summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/common/CMakeLists.txt2
-rw-r--r--src/common/dynamic_library.cpp106
-rw-r--r--src/common/dynamic_library.h75
-rw-r--r--src/common/file_util.cpp22
-rw-r--r--src/common/thread.cpp9
-rw-r--r--src/core/file_sys/patch_manager.cpp6
-rw-r--r--src/core/file_sys/romfs.cpp3
-rw-r--r--src/core/frontend/emu_window.h41
-rw-r--r--src/core/hle/kernel/kernel.cpp2
-rw-r--r--src/core/hle/service/friend/friend.cpp11
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp48
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h2
-rw-r--r--src/core/hle/service/vi/vi.cpp46
-rw-r--r--src/core/memory.cpp127
-rw-r--r--src/core/memory.h78
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/buffer_cache/buffer_block.h42
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h145
-rw-r--r--src/video_core/buffer_cache/map_interval.h12
-rw-r--r--src/video_core/engines/maxwell_3d.h81
-rw-r--r--src/video_core/engines/shader_bytecode.h71
-rw-r--r--src/video_core/engines/shader_header.h55
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp11
-rw-r--r--src/video_core/gpu_asynch.h6
-rw-r--r--src/video_core/gpu_synch.cpp6
-rw-r--r--src/video_core/gpu_synch.h6
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h18
-rw-r--r--src/video_core/memory_manager.cpp93
-rw-r--r--src/video_core/memory_manager.h5
-rw-r--r--src/video_core/query_cache.h37
-rw-r--r--src/video_core/rasterizer_cache.h44
-rw-r--r--src/video_core/rasterizer_interface.h6
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h8
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp48
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp215
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp4
-rw-r--r--src/video_core/renderer_vulkan/declarations.h58
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp533
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.h37
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp291
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp625
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp157
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h16
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp241
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp129
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h30
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp92
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp575
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h83
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp378
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h38
-rw-r--r--src/video_core/renderer_vulkan/vk_image.cpp61
-rw-r--r--src/video_core/renderer_vulkan/vk_image.h40
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.cpp104
-rw-r--r--src/video_core/renderer_vulkan/vk_memory_manager.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp131
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp309
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h21
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.cpp120
-rw-r--r--src/video_core/renderer_vulkan/vk_renderpass_cache.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp87
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp76
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp87
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp19
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_util.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp64
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h18
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp155
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h32
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp380
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h64
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_update_descriptor.h42
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp3
-rw-r--r--src/video_core/shader/decode/conversion.cpp113
-rw-r--r--src/video_core/shader/decode/image.cpp360
-rw-r--r--src/video_core/shader/decode/other.cpp48
-rw-r--r--src/video_core/shader/decode/texture.cpp14
-rw-r--r--src/video_core/shader/decode/video.cpp58
-rw-r--r--src/video_core/shader/shader_ir.cpp3
-rw-r--r--src/video_core/shader/shader_ir.h7
-rw-r--r--src/video_core/surface.h97
-rw-r--r--src/video_core/texture_cache/surface_base.cpp81
-rw-r--r--src/video_core/texture_cache/surface_base.h49
-rw-r--r--src/video_core/texture_cache/surface_params.cpp34
-rw-r--r--src/video_core/texture_cache/surface_params.h36
-rw-r--r--src/video_core/texture_cache/texture_cache.h181
-rw-r--r--src/video_core/textures/astc.cpp241
-rw-r--r--src/video_core/textures/texture.cpp80
-rw-r--r--src/video_core/textures/texture.h46
-rw-r--r--src/yuzu/CMakeLists.txt4
-rw-r--r--src/yuzu/about_dialog.cpp9
-rw-r--r--src/yuzu/bootmanager.cpp93
-rw-r--r--src/yuzu/bootmanager.h9
-rw-r--r--src/yuzu/configuration/configure_graphics.cpp42
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp15
-rw-r--r--src/yuzu/configuration/configure_input_player.ui4
-rw-r--r--src/yuzu/configuration/configure_input_simple.cpp9
-rw-r--r--src/yuzu/configuration/configure_mouse_advanced.cpp15
-rw-r--r--src/yuzu/game_list.cpp6
-rw-r--r--src/yuzu/game_list_p.h24
-rw-r--r--src/yuzu/main.cpp8
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp6
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h4
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp159
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h18
-rw-r--r--src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp4
-rw-r--r--src/yuzu_tester/emu_window/emu_window_sdl2_hide.h4
134 files changed, 5435 insertions, 3550 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9d0af02fd..e40e9b0a5 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -53,6 +53,7 @@ if (MSVC)
53else() 53else()
54 add_compile_options( 54 add_compile_options(
55 -Wall 55 -Wall
56 -Werror=reorder
56 -Wno-attributes 57 -Wno-attributes
57 ) 58 )
58 59
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index fbebed715..eeceaa655 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -106,6 +106,8 @@ add_library(common STATIC
106 common_funcs.h 106 common_funcs.h
107 common_paths.h 107 common_paths.h
108 common_types.h 108 common_types.h
109 dynamic_library.cpp
110 dynamic_library.h
109 file_util.cpp 111 file_util.cpp
110 file_util.h 112 file_util.h
111 hash.h 113 hash.h
diff --git a/src/common/dynamic_library.cpp b/src/common/dynamic_library.cpp
new file mode 100644
index 000000000..7ab54e9e4
--- /dev/null
+++ b/src/common/dynamic_library.cpp
@@ -0,0 +1,106 @@
1// Copyright 2019 Dolphin Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#include <cstring>
6#include <string>
7#include <utility>
8
9#include <fmt/format.h>
10
11#include "common/dynamic_library.h"
12
13#ifdef _WIN32
14#include <windows.h>
15#else
16#include <dlfcn.h>
17#endif
18
19namespace Common {
20
21DynamicLibrary::DynamicLibrary() = default;
22
23DynamicLibrary::DynamicLibrary(const char* filename) {
24 Open(filename);
25}
26
27DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept
28 : handle{std::exchange(rhs.handle, nullptr)} {}
29
30DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept {
31 Close();
32 handle = std::exchange(rhs.handle, nullptr);
33 return *this;
34}
35
36DynamicLibrary::~DynamicLibrary() {
37 Close();
38}
39
40std::string DynamicLibrary::GetUnprefixedFilename(const char* filename) {
41#if defined(_WIN32)
42 return std::string(filename) + ".dll";
43#elif defined(__APPLE__)
44 return std::string(filename) + ".dylib";
45#else
46 return std::string(filename) + ".so";
47#endif
48}
49
50std::string DynamicLibrary::GetVersionedFilename(const char* libname, int major, int minor) {
51#if defined(_WIN32)
52 if (major >= 0 && minor >= 0)
53 return fmt::format("{}-{}-{}.dll", libname, major, minor);
54 else if (major >= 0)
55 return fmt::format("{}-{}.dll", libname, major);
56 else
57 return fmt::format("{}.dll", libname);
58#elif defined(__APPLE__)
59 const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
60 if (major >= 0 && minor >= 0)
61 return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor);
62 else if (major >= 0)
63 return fmt::format("{}{}.{}.dylib", prefix, libname, major);
64 else
65 return fmt::format("{}{}.dylib", prefix, libname);
66#else
67 const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : "";
68 if (major >= 0 && minor >= 0)
69 return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor);
70 else if (major >= 0)
71 return fmt::format("{}{}.so.{}", prefix, libname, major);
72 else
73 return fmt::format("{}{}.so", prefix, libname);
74#endif
75}
76
77bool DynamicLibrary::Open(const char* filename) {
78#ifdef _WIN32
79 handle = reinterpret_cast<void*>(LoadLibraryA(filename));
80#else
81 handle = dlopen(filename, RTLD_NOW);
82#endif
83 return handle != nullptr;
84}
85
86void DynamicLibrary::Close() {
87 if (!IsOpen())
88 return;
89
90#ifdef _WIN32
91 FreeLibrary(reinterpret_cast<HMODULE>(handle));
92#else
93 dlclose(handle);
94#endif
95 handle = nullptr;
96}
97
98void* DynamicLibrary::GetSymbolAddress(const char* name) const {
99#ifdef _WIN32
100 return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name));
101#else
102 return reinterpret_cast<void*>(dlsym(handle, name));
103#endif
104}
105
106} // namespace Common
diff --git a/src/common/dynamic_library.h b/src/common/dynamic_library.h
new file mode 100644
index 000000000..2a06372fd
--- /dev/null
+++ b/src/common/dynamic_library.h
@@ -0,0 +1,75 @@
1// Copyright 2019 Dolphin Emulator Project
2// Licensed under GPLv2+
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9namespace Common {
10
11/**
12 * Provides a platform-independent interface for loading a dynamic library and retrieving symbols.
13 * The interface maintains an internal reference count to allow one handle to be shared between
14 * multiple users.
15 */
16class DynamicLibrary final {
17public:
18 /// Default constructor, does not load a library.
19 explicit DynamicLibrary();
20
21 /// Automatically loads the specified library. Call IsOpen() to check validity before use.
22 explicit DynamicLibrary(const char* filename);
23
24 /// Moves the library.
25 DynamicLibrary(DynamicLibrary&&) noexcept;
26 DynamicLibrary& operator=(DynamicLibrary&&) noexcept;
27
28 /// Delete copies, we can't copy a dynamic library.
29 DynamicLibrary(const DynamicLibrary&) = delete;
30 DynamicLibrary& operator=(const DynamicLibrary&) = delete;
31
32 /// Closes the library.
33 ~DynamicLibrary();
34
35 /// Returns the specified library name with the platform-specific suffix added.
36 static std::string GetUnprefixedFilename(const char* filename);
37
38 /// Returns the specified library name in platform-specific format.
39 /// Major/minor versions will not be included if set to -1.
40 /// If libname already contains the "lib" prefix, it will not be added again.
41 /// Windows: LIBNAME-MAJOR-MINOR.dll
42 /// Linux: libLIBNAME.so.MAJOR.MINOR
43 /// Mac: libLIBNAME.MAJOR.MINOR.dylib
44 static std::string GetVersionedFilename(const char* libname, int major = -1, int minor = -1);
45
46 /// Returns true if a module is loaded, otherwise false.
47 bool IsOpen() const {
48 return handle != nullptr;
49 }
50
51 /// Loads (or replaces) the handle with the specified library file name.
52 /// Returns true if the library was loaded and can be used.
53 bool Open(const char* filename);
54
55 /// Unloads the library, any function pointers from this library are no longer valid.
56 void Close();
57
58 /// Returns the address of the specified symbol (function or variable) as an untyped pointer.
59 /// If the specified symbol does not exist in this library, nullptr is returned.
60 void* GetSymbolAddress(const char* name) const;
61
62 /// Obtains the address of the specified symbol, automatically casting to the correct type.
63 /// Returns true if the symbol was found and assigned, otherwise false.
64 template <typename T>
65 bool GetSymbol(const char* name, T* ptr) const {
66 *ptr = reinterpret_cast<T>(GetSymbolAddress(name));
67 return *ptr != nullptr;
68 }
69
70private:
71 /// Platform-dependent data type representing a dynamic library handle.
72 void* handle = nullptr;
73};
74
75} // namespace Common
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp
index 41167f57a..35eee0096 100644
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array> 5#include <array>
6#include <limits>
6#include <memory> 7#include <memory>
7#include <sstream> 8#include <sstream>
8#include <unordered_map> 9#include <unordered_map>
@@ -530,11 +531,11 @@ void CopyDir(const std::string& source_path, const std::string& dest_path) {
530std::optional<std::string> GetCurrentDir() { 531std::optional<std::string> GetCurrentDir() {
531// Get the current working directory (getcwd uses malloc) 532// Get the current working directory (getcwd uses malloc)
532#ifdef _WIN32 533#ifdef _WIN32
533 wchar_t* dir; 534 wchar_t* dir = _wgetcwd(nullptr, 0);
534 if (!(dir = _wgetcwd(nullptr, 0))) { 535 if (!dir) {
535#else 536#else
536 char* dir; 537 char* dir = getcwd(nullptr, 0);
537 if (!(dir = getcwd(nullptr, 0))) { 538 if (!dir) {
538#endif 539#endif
539 LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg()); 540 LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg());
540 return {}; 541 return {};
@@ -918,19 +919,22 @@ void IOFile::Swap(IOFile& other) noexcept {
918 919
919bool IOFile::Open(const std::string& filename, const char openmode[], int flags) { 920bool IOFile::Open(const std::string& filename, const char openmode[], int flags) {
920 Close(); 921 Close();
922 bool m_good;
921#ifdef _WIN32 923#ifdef _WIN32
922 if (flags != 0) { 924 if (flags != 0) {
923 m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(), 925 m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(),
924 Common::UTF8ToUTF16W(openmode).c_str(), flags); 926 Common::UTF8ToUTF16W(openmode).c_str(), flags);
927 m_good = m_file != nullptr;
925 } else { 928 } else {
926 _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(), 929 m_good = _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(),
927 Common::UTF8ToUTF16W(openmode).c_str()); 930 Common::UTF8ToUTF16W(openmode).c_str()) == 0;
928 } 931 }
929#else 932#else
930 m_file = fopen(filename.c_str(), openmode); 933 m_file = std::fopen(filename.c_str(), openmode);
934 m_good = m_file != nullptr;
931#endif 935#endif
932 936
933 return IsOpen(); 937 return m_good;
934} 938}
935 939
936bool IOFile::Close() { 940bool IOFile::Close() {
@@ -956,7 +960,7 @@ u64 IOFile::Tell() const {
956 if (IsOpen()) 960 if (IsOpen())
957 return ftello(m_file); 961 return ftello(m_file);
958 962
959 return -1; 963 return std::numeric_limits<u64>::max();
960} 964}
961 965
962bool IOFile::Flush() { 966bool IOFile::Flush() {
diff --git a/src/common/thread.cpp b/src/common/thread.cpp
index fe7a420cc..0cd2d10bf 100644
--- a/src/common/thread.cpp
+++ b/src/common/thread.cpp
@@ -28,11 +28,8 @@ namespace Common {
28#ifdef _MSC_VER 28#ifdef _MSC_VER
29 29
30// Sets the debugger-visible name of the current thread. 30// Sets the debugger-visible name of the current thread.
31// Uses undocumented (actually, it is now documented) trick. 31// Uses trick documented in:
32// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp 32// https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code
33
34// This is implemented much nicer in upcoming msvc++, see:
35// http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx
36void SetCurrentThreadName(const char* name) { 33void SetCurrentThreadName(const char* name) {
37 static const DWORD MS_VC_EXCEPTION = 0x406D1388; 34 static const DWORD MS_VC_EXCEPTION = 0x406D1388;
38 35
@@ -47,7 +44,7 @@ void SetCurrentThreadName(const char* name) {
47 44
48 info.dwType = 0x1000; 45 info.dwType = 0x1000;
49 info.szName = name; 46 info.szName = name;
50 info.dwThreadID = -1; // dwThreadID; 47 info.dwThreadID = std::numeric_limits<DWORD>::max();
51 info.dwFlags = 0; 48 info.dwFlags = 0;
52 49
53 __try { 50 __try {
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index e226e9711..e77e82b8d 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -348,6 +348,12 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
348 if (ext_dir != nullptr) 348 if (ext_dir != nullptr)
349 layers_ext.push_back(std::move(ext_dir)); 349 layers_ext.push_back(std::move(ext_dir));
350 } 350 }
351
352 // When there are no layers to apply, return early as there is no need to rebuild the RomFS
353 if (layers.empty() && layers_ext.empty()) {
354 return;
355 }
356
351 layers.push_back(std::move(extracted)); 357 layers.push_back(std::move(extracted));
352 358
353 auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers)); 359 auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers));
diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp
index c909d1ce4..120032134 100644
--- a/src/core/file_sys/romfs.cpp
+++ b/src/core/file_sys/romfs.cpp
@@ -5,6 +5,7 @@
5#include <memory> 5#include <memory>
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/string_util.h"
8#include "common/swap.h" 9#include "common/swap.h"
9#include "core/file_sys/fsmitm_romfsbuild.h" 10#include "core/file_sys/fsmitm_romfsbuild.h"
10#include "core/file_sys/romfs.h" 11#include "core/file_sys/romfs.h"
@@ -126,7 +127,7 @@ VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) {
126 return out->GetSubdirectories().front(); 127 return out->GetSubdirectories().front();
127 128
128 while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) { 129 while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) {
129 if (out->GetSubdirectories().front()->GetName() == "data" && 130 if (Common::ToLower(out->GetSubdirectories().front()->GetName()) == "data" &&
130 type == RomFSExtractionType::Truncated) 131 type == RomFSExtractionType::Truncated)
131 break; 132 break;
132 out = out->GetSubdirectories().front(); 133 out = out->GetSubdirectories().front();
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 72294d4d8..13aa14934 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -12,6 +12,15 @@
12 12
13namespace Core::Frontend { 13namespace Core::Frontend {
14 14
15/// Information for the Graphics Backends signifying what type of screen pointer is in
16/// WindowInformation
17enum class WindowSystemType {
18 Headless,
19 Windows,
20 X11,
21 Wayland,
22};
23
15/** 24/**
16 * Represents a drawing context that supports graphics operations. 25 * Represents a drawing context that supports graphics operations.
17 */ 26 */
@@ -76,6 +85,23 @@ public:
76 std::pair<unsigned, unsigned> min_client_area_size; 85 std::pair<unsigned, unsigned> min_client_area_size;
77 }; 86 };
78 87
88 /// Data describing host window system information
89 struct WindowSystemInfo {
90 // Window system type. Determines which GL context or Vulkan WSI is used.
91 WindowSystemType type = WindowSystemType::Headless;
92
93 // Connection to a display server. This is used on X11 and Wayland platforms.
94 void* display_connection = nullptr;
95
96 // Render surface. This is a pointer to the native window handle, which depends
97 // on the platform. e.g. HWND for Windows, Window for X11. If the surface is
98 // set to nullptr, the video backend will run in headless mode.
99 void* render_surface = nullptr;
100
101 // Scale of the render surface. For hidpi systems, this will be >1.
102 float render_surface_scale = 1.0f;
103 };
104
79 /// Polls window events 105 /// Polls window events
80 virtual void PollEvents() = 0; 106 virtual void PollEvents() = 0;
81 107
@@ -87,10 +113,6 @@ public:
87 /// Returns if window is shown (not minimized) 113 /// Returns if window is shown (not minimized)
88 virtual bool IsShown() const = 0; 114 virtual bool IsShown() const = 0;
89 115
90 /// Retrieves Vulkan specific handlers from the window
91 virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
92 void* surface) const = 0;
93
94 /** 116 /**
95 * Signal that a touch pressed event has occurred (e.g. mouse click pressed) 117 * Signal that a touch pressed event has occurred (e.g. mouse click pressed)
96 * @param framebuffer_x Framebuffer x-coordinate that was pressed 118 * @param framebuffer_x Framebuffer x-coordinate that was pressed
@@ -128,6 +150,13 @@ public:
128 } 150 }
129 151
130 /** 152 /**
153 * Returns system information about the drawing area.
154 */
155 const WindowSystemInfo& GetWindowInfo() const {
156 return window_info;
157 }
158
159 /**
131 * Gets the framebuffer layout (width, height, and screen regions) 160 * Gets the framebuffer layout (width, height, and screen regions)
132 * @note This method is thread-safe 161 * @note This method is thread-safe
133 */ 162 */
@@ -142,7 +171,7 @@ public:
142 void UpdateCurrentFramebufferLayout(unsigned width, unsigned height); 171 void UpdateCurrentFramebufferLayout(unsigned width, unsigned height);
143 172
144protected: 173protected:
145 EmuWindow(); 174 explicit EmuWindow();
146 virtual ~EmuWindow(); 175 virtual ~EmuWindow();
147 176
148 /** 177 /**
@@ -179,6 +208,8 @@ protected:
179 client_area_height = size.second; 208 client_area_height = size.second;
180 } 209 }
181 210
211 WindowSystemInfo window_info;
212
182private: 213private:
183 /** 214 /**
184 * Handler called when the minimal client area was requested to be changed via SetConfig. 215 * Handler called when the minimal client area was requested to be changed via SetConfig.
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index e47f1deed..014d647cf 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -103,7 +103,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_
103 103
104struct KernelCore::Impl { 104struct KernelCore::Impl {
105 explicit Impl(Core::System& system, KernelCore& kernel) 105 explicit Impl(Core::System& system, KernelCore& kernel)
106 : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {} 106 : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {}
107 107
108 void Initialize(KernelCore& kernel) { 108 void Initialize(KernelCore& kernel) {
109 Shutdown(); 109 Shutdown();
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp
index 6aadb3ea8..7938b4b80 100644
--- a/src/core/hle/service/friend/friend.cpp
+++ b/src/core/hle/service/friend/friend.cpp
@@ -27,7 +27,7 @@ public:
27 {10110, nullptr, "GetFriendProfileImage"}, 27 {10110, nullptr, "GetFriendProfileImage"},
28 {10200, nullptr, "SendFriendRequestForApplication"}, 28 {10200, nullptr, "SendFriendRequestForApplication"},
29 {10211, nullptr, "AddFacedFriendRequestForApplication"}, 29 {10211, nullptr, "AddFacedFriendRequestForApplication"},
30 {10400, nullptr, "GetBlockedUserListIds"}, 30 {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"},
31 {10500, nullptr, "GetProfileList"}, 31 {10500, nullptr, "GetProfileList"},
32 {10600, nullptr, "DeclareOpenOnlinePlaySession"}, 32 {10600, nullptr, "DeclareOpenOnlinePlaySession"},
33 {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, 33 {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"},
@@ -121,6 +121,15 @@ private:
121 }; 121 };
122 static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); 122 static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size");
123 123
124 void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) {
125 // This is safe to stub, as there should be no adverse consequences from reporting no
126 // blocked users.
127 LOG_WARNING(Service_ACC, "(STUBBED) called");
128 IPC::ResponseBuilder rb{ctx, 3};
129 rb.Push(RESULT_SUCCESS);
130 rb.Push<u32>(0); // Indicates there are no blocked users
131 }
132
124 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { 133 void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) {
125 // Stub used by Splatoon 2 134 // Stub used by Splatoon 2
126 LOG_WARNING(Service_ACC, "(STUBBED) called"); 135 LOG_WARNING(Service_ACC, "(STUBBED) called");
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 32b6f4b27..f1e3d832a 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -28,6 +28,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
28 buffer.slot = slot; 28 buffer.slot = slot;
29 buffer.igbp_buffer = igbp_buffer; 29 buffer.igbp_buffer = igbp_buffer;
30 buffer.status = Buffer::Status::Free; 30 buffer.status = Buffer::Status::Free;
31 free_buffers.push_back(slot);
31 32
32 queue.emplace_back(buffer); 33 queue.emplace_back(buffer);
33 buffer_wait_event.writable->Signal(); 34 buffer_wait_event.writable->Signal();
@@ -35,16 +36,37 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
35 36
36std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, 37std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
37 u32 height) { 38 u32 height) {
38 auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
39 // Only consider free buffers. Buffers become free once again after they've been Acquired
40 // and Released by the compositor, see the NVFlinger::Compose method.
41 if (buffer.status != Buffer::Status::Free) {
42 return false;
43 }
44 39
45 // Make sure that the parameters match. 40 if (free_buffers.empty()) {
46 return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; 41 return {};
47 }); 42 }
43
44 auto f_itr = free_buffers.begin();
45 auto itr = queue.end();
46
47 while (f_itr != free_buffers.end()) {
48 auto slot = *f_itr;
49 itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) {
50 // Only consider free buffers. Buffers become free once again after they've been
51 // Acquired and Released by the compositor, see the NVFlinger::Compose method.
52 if (buffer.status != Buffer::Status::Free) {
53 return false;
54 }
55
56 if (buffer.slot != slot) {
57 return false;
58 }
59
60 // Make sure that the parameters match.
61 return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height;
62 });
63
64 if (itr != queue.end()) {
65 free_buffers.erase(f_itr);
66 break;
67 }
68 ++f_itr;
69 }
48 70
49 if (itr == queue.end()) { 71 if (itr == queue.end()) {
50 return {}; 72 return {};
@@ -99,10 +121,18 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
99 ASSERT(itr != queue.end()); 121 ASSERT(itr != queue.end());
100 ASSERT(itr->status == Buffer::Status::Acquired); 122 ASSERT(itr->status == Buffer::Status::Acquired);
101 itr->status = Buffer::Status::Free; 123 itr->status = Buffer::Status::Free;
124 free_buffers.push_back(slot);
102 125
103 buffer_wait_event.writable->Signal(); 126 buffer_wait_event.writable->Signal();
104} 127}
105 128
129void BufferQueue::Disconnect() {
130 queue.clear();
131 queue_sequence.clear();
132 id = 1;
133 layer_id = 1;
134}
135
106u32 BufferQueue::Query(QueryType type) { 136u32 BufferQueue::Query(QueryType type) {
107 LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); 137 LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type));
108 138
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index f4bbfd945..d5f31e567 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -87,6 +87,7 @@ public:
87 Service::Nvidia::MultiFence& multi_fence); 87 Service::Nvidia::MultiFence& multi_fence);
88 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); 88 std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer();
89 void ReleaseBuffer(u32 slot); 89 void ReleaseBuffer(u32 slot);
90 void Disconnect();
90 u32 Query(QueryType type); 91 u32 Query(QueryType type);
91 92
92 u32 GetId() const { 93 u32 GetId() const {
@@ -101,6 +102,7 @@ private:
101 u32 id; 102 u32 id;
102 u64 layer_id; 103 u64 layer_id;
103 104
105 std::list<u32> free_buffers;
104 std::vector<Buffer> queue; 106 std::vector<Buffer> queue;
105 std::list<u32> queue_sequence; 107 std::list<u32> queue_sequence;
106 Kernel::EventPair buffer_wait_event; 108 Kernel::EventPair buffer_wait_event;
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp
index 519da74e0..fdc62d05b 100644
--- a/src/core/hle/service/vi/vi.cpp
+++ b/src/core/hle/service/vi/vi.cpp
@@ -513,7 +513,8 @@ private:
513 513
514 auto& buffer_queue = nv_flinger->FindBufferQueue(id); 514 auto& buffer_queue = nv_flinger->FindBufferQueue(id);
515 515
516 if (transaction == TransactionId::Connect) { 516 switch (transaction) {
517 case TransactionId::Connect: {
517 IGBPConnectRequestParcel request{ctx.ReadBuffer()}; 518 IGBPConnectRequestParcel request{ctx.ReadBuffer()};
518 IGBPConnectResponseParcel response{ 519 IGBPConnectResponseParcel response{
519 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * 520 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) *
@@ -521,14 +522,18 @@ private:
521 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * 522 static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) *
522 Settings::values.resolution_factor)}; 523 Settings::values.resolution_factor)};
523 ctx.WriteBuffer(response.Serialize()); 524 ctx.WriteBuffer(response.Serialize());
524 } else if (transaction == TransactionId::SetPreallocatedBuffer) { 525 break;
526 }
527 case TransactionId::SetPreallocatedBuffer: {
525 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; 528 IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()};
526 529
527 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); 530 buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer);
528 531
529 IGBPSetPreallocatedBufferResponseParcel response{}; 532 IGBPSetPreallocatedBufferResponseParcel response{};
530 ctx.WriteBuffer(response.Serialize()); 533 ctx.WriteBuffer(response.Serialize());
531 } else if (transaction == TransactionId::DequeueBuffer) { 534 break;
535 }
536 case TransactionId::DequeueBuffer: {
532 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; 537 IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()};
533 const u32 width{request.data.width}; 538 const u32 width{request.data.width};
534 const u32 height{request.data.height}; 539 const u32 height{request.data.height};
@@ -556,14 +561,18 @@ private:
556 }, 561 },
557 buffer_queue.GetWritableBufferWaitEvent()); 562 buffer_queue.GetWritableBufferWaitEvent());
558 } 563 }
559 } else if (transaction == TransactionId::RequestBuffer) { 564 break;
565 }
566 case TransactionId::RequestBuffer: {
560 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; 567 IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()};
561 568
562 auto& buffer = buffer_queue.RequestBuffer(request.slot); 569 auto& buffer = buffer_queue.RequestBuffer(request.slot);
563 570
564 IGBPRequestBufferResponseParcel response{buffer}; 571 IGBPRequestBufferResponseParcel response{buffer};
565 ctx.WriteBuffer(response.Serialize()); 572 ctx.WriteBuffer(response.Serialize());
566 } else if (transaction == TransactionId::QueueBuffer) { 573 break;
574 }
575 case TransactionId::QueueBuffer: {
567 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; 576 IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()};
568 577
569 buffer_queue.QueueBuffer(request.data.slot, request.data.transform, 578 buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
@@ -572,7 +581,9 @@ private:
572 581
573 IGBPQueueBufferResponseParcel response{1280, 720}; 582 IGBPQueueBufferResponseParcel response{1280, 720};
574 ctx.WriteBuffer(response.Serialize()); 583 ctx.WriteBuffer(response.Serialize());
575 } else if (transaction == TransactionId::Query) { 584 break;
585 }
586 case TransactionId::Query: {
576 IGBPQueryRequestParcel request{ctx.ReadBuffer()}; 587 IGBPQueryRequestParcel request{ctx.ReadBuffer()};
577 588
578 const u32 value = 589 const u32 value =
@@ -580,15 +591,30 @@ private:
580 591
581 IGBPQueryResponseParcel response{value}; 592 IGBPQueryResponseParcel response{value};
582 ctx.WriteBuffer(response.Serialize()); 593 ctx.WriteBuffer(response.Serialize());
583 } else if (transaction == TransactionId::CancelBuffer) { 594 break;
595 }
596 case TransactionId::CancelBuffer: {
584 LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer"); 597 LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer");
585 } else if (transaction == TransactionId::Disconnect || 598 break;
586 transaction == TransactionId::DetachBuffer) { 599 }
600 case TransactionId::Disconnect: {
601 LOG_WARNING(Service_VI, "(STUBBED) called, transaction=Disconnect");
602 const auto buffer = ctx.ReadBuffer();
603
604 buffer_queue.Disconnect();
605
606 IGBPEmptyResponseParcel response{};
607 ctx.WriteBuffer(response.Serialize());
608 break;
609 }
610 case TransactionId::DetachBuffer: {
587 const auto buffer = ctx.ReadBuffer(); 611 const auto buffer = ctx.ReadBuffer();
588 612
589 IGBPEmptyResponseParcel response{}; 613 IGBPEmptyResponseParcel response{};
590 ctx.WriteBuffer(response.Serialize()); 614 ctx.WriteBuffer(response.Serialize());
591 } else { 615 break;
616 }
617 default:
592 ASSERT_MSG(false, "Unimplemented"); 618 ASSERT_MSG(false, "Unimplemented");
593 } 619 }
594 620
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index f0888327f..6061d37ae 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -242,7 +242,52 @@ struct Memory::Impl {
242 } 242 }
243 case Common::PageType::RasterizerCachedMemory: { 243 case Common::PageType::RasterizerCachedMemory: {
244 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 244 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
245 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); 245 system.GPU().FlushRegion(current_vaddr, copy_amount);
246 std::memcpy(dest_buffer, host_ptr, copy_amount);
247 break;
248 }
249 default:
250 UNREACHABLE();
251 }
252
253 page_index++;
254 page_offset = 0;
255 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
256 remaining_size -= copy_amount;
257 }
258 }
259
260 void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
261 const std::size_t size) {
262 const auto& page_table = process.VMManager().page_table;
263
264 std::size_t remaining_size = size;
265 std::size_t page_index = src_addr >> PAGE_BITS;
266 std::size_t page_offset = src_addr & PAGE_MASK;
267
268 while (remaining_size > 0) {
269 const std::size_t copy_amount =
270 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
271 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
272
273 switch (page_table.attributes[page_index]) {
274 case Common::PageType::Unmapped: {
275 LOG_ERROR(HW_Memory,
276 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
277 current_vaddr, src_addr, size);
278 std::memset(dest_buffer, 0, copy_amount);
279 break;
280 }
281 case Common::PageType::Memory: {
282 DEBUG_ASSERT(page_table.pointers[page_index]);
283
284 const u8* const src_ptr =
285 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
286 std::memcpy(dest_buffer, src_ptr, copy_amount);
287 break;
288 }
289 case Common::PageType::RasterizerCachedMemory: {
290 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
246 std::memcpy(dest_buffer, host_ptr, copy_amount); 291 std::memcpy(dest_buffer, host_ptr, copy_amount);
247 break; 292 break;
248 } 293 }
@@ -261,6 +306,10 @@ struct Memory::Impl {
261 ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); 306 ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size);
262 } 307 }
263 308
309 void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
310 ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size);
311 }
312
264 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, 313 void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer,
265 const std::size_t size) { 314 const std::size_t size) {
266 const auto& page_table = process.VMManager().page_table; 315 const auto& page_table = process.VMManager().page_table;
@@ -290,7 +339,50 @@ struct Memory::Impl {
290 } 339 }
291 case Common::PageType::RasterizerCachedMemory: { 340 case Common::PageType::RasterizerCachedMemory: {
292 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 341 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
293 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); 342 system.GPU().InvalidateRegion(current_vaddr, copy_amount);
343 std::memcpy(host_ptr, src_buffer, copy_amount);
344 break;
345 }
346 default:
347 UNREACHABLE();
348 }
349
350 page_index++;
351 page_offset = 0;
352 src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
353 remaining_size -= copy_amount;
354 }
355 }
356
357 void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr,
358 const void* src_buffer, const std::size_t size) {
359 const auto& page_table = process.VMManager().page_table;
360 std::size_t remaining_size = size;
361 std::size_t page_index = dest_addr >> PAGE_BITS;
362 std::size_t page_offset = dest_addr & PAGE_MASK;
363
364 while (remaining_size > 0) {
365 const std::size_t copy_amount =
366 std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size);
367 const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
368
369 switch (page_table.attributes[page_index]) {
370 case Common::PageType::Unmapped: {
371 LOG_ERROR(HW_Memory,
372 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
373 current_vaddr, dest_addr, size);
374 break;
375 }
376 case Common::PageType::Memory: {
377 DEBUG_ASSERT(page_table.pointers[page_index]);
378
379 u8* const dest_ptr =
380 page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS);
381 std::memcpy(dest_ptr, src_buffer, copy_amount);
382 break;
383 }
384 case Common::PageType::RasterizerCachedMemory: {
385 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
294 std::memcpy(host_ptr, src_buffer, copy_amount); 386 std::memcpy(host_ptr, src_buffer, copy_amount);
295 break; 387 break;
296 } 388 }
@@ -309,6 +401,10 @@ struct Memory::Impl {
309 WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); 401 WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size);
310 } 402 }
311 403
404 void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) {
405 WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size);
406 }
407
312 void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { 408 void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) {
313 const auto& page_table = process.VMManager().page_table; 409 const auto& page_table = process.VMManager().page_table;
314 std::size_t remaining_size = size; 410 std::size_t remaining_size = size;
@@ -337,7 +433,7 @@ struct Memory::Impl {
337 } 433 }
338 case Common::PageType::RasterizerCachedMemory: { 434 case Common::PageType::RasterizerCachedMemory: {
339 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 435 u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
340 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); 436 system.GPU().InvalidateRegion(current_vaddr, copy_amount);
341 std::memset(host_ptr, 0, copy_amount); 437 std::memset(host_ptr, 0, copy_amount);
342 break; 438 break;
343 } 439 }
@@ -384,7 +480,7 @@ struct Memory::Impl {
384 } 480 }
385 case Common::PageType::RasterizerCachedMemory: { 481 case Common::PageType::RasterizerCachedMemory: {
386 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); 482 const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr);
387 system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); 483 system.GPU().FlushRegion(current_vaddr, copy_amount);
388 WriteBlock(process, dest_addr, host_ptr, copy_amount); 484 WriteBlock(process, dest_addr, host_ptr, copy_amount);
389 break; 485 break;
390 } 486 }
@@ -545,7 +641,7 @@ struct Memory::Impl {
545 break; 641 break;
546 case Common::PageType::RasterizerCachedMemory: { 642 case Common::PageType::RasterizerCachedMemory: {
547 const u8* const host_ptr = GetPointerFromVMA(vaddr); 643 const u8* const host_ptr = GetPointerFromVMA(vaddr);
548 system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); 644 system.GPU().FlushRegion(vaddr, sizeof(T));
549 T value; 645 T value;
550 std::memcpy(&value, host_ptr, sizeof(T)); 646 std::memcpy(&value, host_ptr, sizeof(T));
551 return value; 647 return value;
@@ -587,7 +683,7 @@ struct Memory::Impl {
587 break; 683 break;
588 case Common::PageType::RasterizerCachedMemory: { 684 case Common::PageType::RasterizerCachedMemory: {
589 u8* const host_ptr{GetPointerFromVMA(vaddr)}; 685 u8* const host_ptr{GetPointerFromVMA(vaddr)};
590 system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); 686 system.GPU().InvalidateRegion(vaddr, sizeof(T));
591 std::memcpy(host_ptr, &data, sizeof(T)); 687 std::memcpy(host_ptr, &data, sizeof(T));
592 break; 688 break;
593 } 689 }
@@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_
696 impl->ReadBlock(src_addr, dest_buffer, size); 792 impl->ReadBlock(src_addr, dest_buffer, size);
697} 793}
698 794
795void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr,
796 void* dest_buffer, const std::size_t size) {
797 impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size);
798}
799
800void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) {
801 impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
802}
803
699void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, 804void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
700 std::size_t size) { 805 std::size_t size) {
701 impl->WriteBlock(process, dest_addr, src_buffer, size); 806 impl->WriteBlock(process, dest_addr, src_buffer, size);
@@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std
705 impl->WriteBlock(dest_addr, src_buffer, size); 810 impl->WriteBlock(dest_addr, src_buffer, size);
706} 811}
707 812
813void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr,
814 const void* src_buffer, std::size_t size) {
815 impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size);
816}
817
818void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer,
819 const std::size_t size) {
820 impl->WriteBlockUnsafe(dest_addr, src_buffer, size);
821}
822
708void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { 823void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) {
709 impl->ZeroBlock(process, dest_addr, size); 824 impl->ZeroBlock(process, dest_addr, size);
710} 825}
diff --git a/src/core/memory.h b/src/core/memory.h
index 8913a9da4..b92d678a4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -295,6 +295,27 @@ public:
295 std::size_t size); 295 std::size_t size);
296 296
297 /** 297 /**
298 * Reads a contiguous block of bytes from a specified process' address space.
299 * This unsafe version does not trigger GPU flushing.
300 *
301 * @param process The process to read the data from.
302 * @param src_addr The virtual address to begin reading from.
303 * @param dest_buffer The buffer to place the read bytes into.
304 * @param size The amount of data to read, in bytes.
305 *
306 * @note If a size of 0 is specified, then this function reads nothing and
307 * no attempts to access memory are made at all.
308 *
309 * @pre dest_buffer must be at least size bytes in length, otherwise a
310 * buffer overrun will occur.
311 *
312 * @post The range [dest_buffer, size) contains the read bytes from the
313 * process' address space.
314 */
315 void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
316 std::size_t size);
317
318 /**
298 * Reads a contiguous block of bytes from the current process' address space. 319 * Reads a contiguous block of bytes from the current process' address space.
299 * 320 *
300 * @param src_addr The virtual address to begin reading from. 321 * @param src_addr The virtual address to begin reading from.
@@ -313,6 +334,25 @@ public:
313 void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); 334 void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
314 335
315 /** 336 /**
337 * Reads a contiguous block of bytes from the current process' address space.
338 * This unsafe version does not trigger GPU flushing.
339 *
340 * @param src_addr The virtual address to begin reading from.
341 * @param dest_buffer The buffer to place the read bytes into.
342 * @param size The amount of data to read, in bytes.
343 *
344 * @note If a size of 0 is specified, then this function reads nothing and
345 * no attempts to access memory are made at all.
346 *
347 * @pre dest_buffer must be at least size bytes in length, otherwise a
348 * buffer overrun will occur.
349 *
350 * @post The range [dest_buffer, size) contains the read bytes from the
351 * current process' address space.
352 */
353 void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size);
354
355 /**
316 * Writes a range of bytes into a given process' address space at the specified 356 * Writes a range of bytes into a given process' address space at the specified
317 * virtual address. 357 * virtual address.
318 * 358 *
@@ -336,6 +376,26 @@ public:
336 std::size_t size); 376 std::size_t size);
337 377
338 /** 378 /**
379 * Writes a range of bytes into a given process' address space at the specified
380 * virtual address.
381 * This unsafe version does not invalidate GPU Memory.
382 *
383 * @param process The process to write data into the address space of.
384 * @param dest_addr The destination virtual address to begin writing the data at.
385 * @param src_buffer The data to write into the process' address space.
386 * @param size The size of the data to write, in bytes.
387 *
388 * @post The address range [dest_addr, size) in the process' address space
389 * contains the data that was within src_buffer.
390 *
391 * @post If an attempt is made to write into an unmapped region of memory, the writes
392 * will be ignored and an error will be logged.
393 *
394 */
395 void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
396 std::size_t size);
397
398 /**
339 * Writes a range of bytes into the current process' address space at the specified 399 * Writes a range of bytes into the current process' address space at the specified
340 * virtual address. 400 * virtual address.
341 * 401 *
@@ -357,6 +417,24 @@ public:
357 void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); 417 void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
358 418
359 /** 419 /**
420 * Writes a range of bytes into the current process' address space at the specified
421 * virtual address.
422 * This unsafe version does not invalidate GPU Memory.
423 *
424 * @param dest_addr The destination virtual address to begin writing the data at.
425 * @param src_buffer The data to write into the current process' address space.
426 * @param size The size of the data to write, in bytes.
427 *
428 * @post The address range [dest_addr, size) in the current process' address space
429 * contains the data that was within src_buffer.
430 *
431 * @post If an attempt is made to write into an unmapped region of memory, the writes
432 * will be ignored and an error will be logged.
433 *
434 */
435 void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size);
436
437 /**
360 * Fills the specified address range within a process' address space with zeroes. 438 * Fills the specified address range within a process' address space with zeroes.
361 * 439 *
362 * @param process The process that will have a portion of its memory zeroed out. 440 * @param process The process that will have a portion of its memory zeroed out.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index effe76a63..258d58eba 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -148,6 +148,7 @@ add_library(video_core STATIC
148 textures/convert.h 148 textures/convert.h
149 textures/decoders.cpp 149 textures/decoders.cpp
150 textures/decoders.h 150 textures/decoders.h
151 textures/texture.cpp
151 textures/texture.h 152 textures/texture.h
152 video_core.cpp 153 video_core.cpp
153 video_core.h 154 video_core.h
@@ -155,7 +156,6 @@ add_library(video_core STATIC
155 156
156if (ENABLE_VULKAN) 157if (ENABLE_VULKAN)
157 target_sources(video_core PRIVATE 158 target_sources(video_core PRIVATE
158 renderer_vulkan/declarations.h
159 renderer_vulkan/fixed_pipeline_state.cpp 159 renderer_vulkan/fixed_pipeline_state.cpp
160 renderer_vulkan/fixed_pipeline_state.h 160 renderer_vulkan/fixed_pipeline_state.h
161 renderer_vulkan/maxwell_to_vk.cpp 161 renderer_vulkan/maxwell_to_vk.cpp
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
index 4b9193182..e35ee0b67 100644
--- a/src/video_core/buffer_cache/buffer_block.h
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -15,37 +15,29 @@ namespace VideoCommon {
15 15
16class BufferBlock { 16class BufferBlock {
17public: 17public:
18 bool Overlaps(const CacheAddr start, const CacheAddr end) const { 18 bool Overlaps(const VAddr start, const VAddr end) const {
19 return (cache_addr < end) && (cache_addr_end > start); 19 return (cpu_addr < end) && (cpu_addr_end > start);
20 } 20 }
21 21
22 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { 22 bool IsInside(const VAddr other_start, const VAddr other_end) const {
23 return cache_addr <= other_start && other_end <= cache_addr_end; 23 return cpu_addr <= other_start && other_end <= cpu_addr_end;
24 } 24 }
25 25
26 u8* GetWritableHostPtr() const { 26 std::size_t GetOffset(const VAddr in_addr) {
27 return FromCacheAddr(cache_addr); 27 return static_cast<std::size_t>(in_addr - cpu_addr);
28 } 28 }
29 29
30 u8* GetWritableHostPtr(std::size_t offset) const { 30 VAddr GetCpuAddr() const {
31 return FromCacheAddr(cache_addr + offset); 31 return cpu_addr;
32 } 32 }
33 33
34 std::size_t GetOffset(const CacheAddr in_addr) { 34 VAddr GetCpuAddrEnd() const {
35 return static_cast<std::size_t>(in_addr - cache_addr); 35 return cpu_addr_end;
36 } 36 }
37 37
38 CacheAddr GetCacheAddr() const { 38 void SetCpuAddr(const VAddr new_addr) {
39 return cache_addr; 39 cpu_addr = new_addr;
40 } 40 cpu_addr_end = new_addr + size;
41
42 CacheAddr GetCacheAddrEnd() const {
43 return cache_addr_end;
44 }
45
46 void SetCacheAddr(const CacheAddr new_addr) {
47 cache_addr = new_addr;
48 cache_addr_end = new_addr + size;
49 } 41 }
50 42
51 std::size_t GetSize() const { 43 std::size_t GetSize() const {
@@ -61,14 +53,14 @@ public:
61 } 53 }
62 54
63protected: 55protected:
64 explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { 56 explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} {
65 SetCacheAddr(cache_addr); 57 SetCpuAddr(cpu_addr);
66 } 58 }
67 ~BufferBlock() = default; 59 ~BufferBlock() = default;
68 60
69private: 61private:
70 CacheAddr cache_addr{}; 62 VAddr cpu_addr{};
71 CacheAddr cache_addr_end{}; 63 VAddr cpu_addr_end{};
72 std::size_t size{}; 64 std::size_t size{};
73 u64 epoch{}; 65 u64 epoch{};
74}; 66};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 186aca61d..b57c0d4d4 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -19,6 +19,7 @@
19#include "common/alignment.h" 19#include "common/alignment.h"
20#include "common/common_types.h" 20#include "common/common_types.h"
21#include "core/core.h" 21#include "core/core.h"
22#include "core/memory.h"
22#include "video_core/buffer_cache/buffer_block.h" 23#include "video_core/buffer_cache/buffer_block.h"
23#include "video_core/buffer_cache/map_interval.h" 24#include "video_core/buffer_cache/map_interval.h"
24#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
@@ -37,28 +38,45 @@ public:
37 bool is_written = false, bool use_fast_cbuf = false) { 38 bool is_written = false, bool use_fast_cbuf = false) {
38 std::lock_guard lock{mutex}; 39 std::lock_guard lock{mutex};
39 40
40 auto& memory_manager = system.GPU().MemoryManager(); 41 const std::optional<VAddr> cpu_addr_opt =
41 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 42 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
42 if (!host_ptr) { 43
44 if (!cpu_addr_opt) {
43 return {GetEmptyBuffer(size), 0}; 45 return {GetEmptyBuffer(size), 0};
44 } 46 }
45 const auto cache_addr = ToCacheAddr(host_ptr); 47
48 VAddr cpu_addr = *cpu_addr_opt;
46 49
47 // Cache management is a big overhead, so only cache entries with a given size. 50 // Cache management is a big overhead, so only cache entries with a given size.
48 // TODO: Figure out which size is the best for given games. 51 // TODO: Figure out which size is the best for given games.
49 constexpr std::size_t max_stream_size = 0x800; 52 constexpr std::size_t max_stream_size = 0x800;
50 if (use_fast_cbuf || size < max_stream_size) { 53 if (use_fast_cbuf || size < max_stream_size) {
51 if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { 54 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
55 auto& memory_manager = system.GPU().MemoryManager();
52 if (use_fast_cbuf) { 56 if (use_fast_cbuf) {
53 return ConstBufferUpload(host_ptr, size); 57 if (memory_manager.IsGranularRange(gpu_addr, size)) {
58 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
59 return ConstBufferUpload(host_ptr, size);
60 } else {
61 staging_buffer.resize(size);
62 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
63 return ConstBufferUpload(staging_buffer.data(), size);
64 }
54 } else { 65 } else {
55 return StreamBufferUpload(host_ptr, size, alignment); 66 if (memory_manager.IsGranularRange(gpu_addr, size)) {
67 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
68 return StreamBufferUpload(host_ptr, size, alignment);
69 } else {
70 staging_buffer.resize(size);
71 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
72 return StreamBufferUpload(staging_buffer.data(), size, alignment);
73 }
56 } 74 }
57 } 75 }
58 } 76 }
59 77
60 auto block = GetBlock(cache_addr, size); 78 auto block = GetBlock(cpu_addr, size);
61 auto map = MapAddress(block, gpu_addr, cache_addr, size); 79 auto map = MapAddress(block, gpu_addr, cpu_addr, size);
62 if (is_written) { 80 if (is_written) {
63 map->MarkAsModified(true, GetModifiedTicks()); 81 map->MarkAsModified(true, GetModifiedTicks());
64 if (!map->IsWritten()) { 82 if (!map->IsWritten()) {
@@ -71,7 +89,7 @@ public:
71 } 89 }
72 } 90 }
73 91
74 const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); 92 const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr));
75 93
76 return {ToHandle(block), offset}; 94 return {ToHandle(block), offset};
77 } 95 }
@@ -112,7 +130,7 @@ public:
112 } 130 }
113 131
114 /// Write any cached resources overlapping the specified region back to memory 132 /// Write any cached resources overlapping the specified region back to memory
115 void FlushRegion(CacheAddr addr, std::size_t size) { 133 void FlushRegion(VAddr addr, std::size_t size) {
116 std::lock_guard lock{mutex}; 134 std::lock_guard lock{mutex};
117 135
118 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 136 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -127,7 +145,7 @@ public:
127 } 145 }
128 146
129 /// Mark the specified region as being invalidated 147 /// Mark the specified region as being invalidated
130 void InvalidateRegion(CacheAddr addr, u64 size) { 148 void InvalidateRegion(VAddr addr, u64 size) {
131 std::lock_guard lock{mutex}; 149 std::lock_guard lock{mutex};
132 150
133 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 151 std::vector<MapInterval> objects = GetMapsInRange(addr, size);
@@ -152,7 +170,7 @@ protected:
152 170
153 virtual void WriteBarrier() = 0; 171 virtual void WriteBarrier() = 0;
154 172
155 virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; 173 virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
156 174
157 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, 175 virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
158 const u8* data) = 0; 176 const u8* data) = 0;
@@ -169,20 +187,17 @@ protected:
169 187
170 /// Register an object into the cache 188 /// Register an object into the cache
171 void Register(const MapInterval& new_map, bool inherit_written = false) { 189 void Register(const MapInterval& new_map, bool inherit_written = false) {
172 const CacheAddr cache_ptr = new_map->GetStart(); 190 const VAddr cpu_addr = new_map->GetStart();
173 const std::optional<VAddr> cpu_addr = 191 if (!cpu_addr) {
174 system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
175 if (!cache_ptr || !cpu_addr) {
176 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", 192 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
177 new_map->GetGpuAddress()); 193 new_map->GetGpuAddress());
178 return; 194 return;
179 } 195 }
180 const std::size_t size = new_map->GetEnd() - new_map->GetStart(); 196 const std::size_t size = new_map->GetEnd() - new_map->GetStart();
181 new_map->SetCpuAddress(*cpu_addr);
182 new_map->MarkAsRegistered(true); 197 new_map->MarkAsRegistered(true);
183 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; 198 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
184 mapped_addresses.insert({interval, new_map}); 199 mapped_addresses.insert({interval, new_map});
185 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); 200 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
186 if (inherit_written) { 201 if (inherit_written) {
187 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); 202 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
188 new_map->MarkAsWritten(true); 203 new_map->MarkAsWritten(true);
@@ -192,7 +207,7 @@ protected:
192 /// Unregisters an object from the cache 207 /// Unregisters an object from the cache
193 void Unregister(MapInterval& map) { 208 void Unregister(MapInterval& map) {
194 const std::size_t size = map->GetEnd() - map->GetStart(); 209 const std::size_t size = map->GetEnd() - map->GetStart();
195 rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); 210 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
196 map->MarkAsRegistered(false); 211 map->MarkAsRegistered(false);
197 if (map->IsWritten()) { 212 if (map->IsWritten()) {
198 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 213 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -202,32 +217,39 @@ protected:
202 } 217 }
203 218
204private: 219private:
205 MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { 220 MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) {
206 return std::make_shared<MapIntervalBase>(start, end, gpu_addr); 221 return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
207 } 222 }
208 223
209 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, 224 MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
210 const CacheAddr cache_addr, const std::size_t size) { 225 const std::size_t size) {
211 226
212 std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); 227 std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
213 if (overlaps.empty()) { 228 if (overlaps.empty()) {
214 const CacheAddr cache_addr_end = cache_addr + size; 229 auto& memory_manager = system.GPU().MemoryManager();
215 MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); 230 const VAddr cpu_addr_end = cpu_addr + size;
216 u8* host_ptr = FromCacheAddr(cache_addr); 231 MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
217 UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); 232 if (memory_manager.IsGranularRange(gpu_addr, size)) {
233 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
234 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
235 } else {
236 staging_buffer.resize(size);
237 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
238 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
239 }
218 Register(new_map); 240 Register(new_map);
219 return new_map; 241 return new_map;
220 } 242 }
221 243
222 const CacheAddr cache_addr_end = cache_addr + size; 244 const VAddr cpu_addr_end = cpu_addr + size;
223 if (overlaps.size() == 1) { 245 if (overlaps.size() == 1) {
224 MapInterval& current_map = overlaps[0]; 246 MapInterval& current_map = overlaps[0];
225 if (current_map->IsInside(cache_addr, cache_addr_end)) { 247 if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
226 return current_map; 248 return current_map;
227 } 249 }
228 } 250 }
229 CacheAddr new_start = cache_addr; 251 VAddr new_start = cpu_addr;
230 CacheAddr new_end = cache_addr_end; 252 VAddr new_end = cpu_addr_end;
231 bool write_inheritance = false; 253 bool write_inheritance = false;
232 bool modified_inheritance = false; 254 bool modified_inheritance = false;
233 // Calculate new buffer parameters 255 // Calculate new buffer parameters
@@ -237,7 +259,7 @@ private:
237 write_inheritance |= overlap->IsWritten(); 259 write_inheritance |= overlap->IsWritten();
238 modified_inheritance |= overlap->IsModified(); 260 modified_inheritance |= overlap->IsModified();
239 } 261 }
240 GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; 262 GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
241 for (auto& overlap : overlaps) { 263 for (auto& overlap : overlaps) {
242 Unregister(overlap); 264 Unregister(overlap);
243 } 265 }
@@ -250,7 +272,7 @@ private:
250 return new_map; 272 return new_map;
251 } 273 }
252 274
253 void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, 275 void UpdateBlock(const TBuffer& block, VAddr start, VAddr end,
254 std::vector<MapInterval>& overlaps) { 276 std::vector<MapInterval>& overlaps) {
255 const IntervalType base_interval{start, end}; 277 const IntervalType base_interval{start, end};
256 IntervalSet interval_set{}; 278 IntervalSet interval_set{};
@@ -262,13 +284,15 @@ private:
262 for (auto& interval : interval_set) { 284 for (auto& interval : interval_set) {
263 std::size_t size = interval.upper() - interval.lower(); 285 std::size_t size = interval.upper() - interval.lower();
264 if (size > 0) { 286 if (size > 0) {
265 u8* host_ptr = FromCacheAddr(interval.lower()); 287 staging_buffer.resize(size);
266 UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); 288 system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
289 UploadBlockData(block, block->GetOffset(interval.lower()), size,
290 staging_buffer.data());
267 } 291 }
268 } 292 }
269 } 293 }
270 294
271 std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { 295 std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) {
272 if (size == 0) { 296 if (size == 0) {
273 return {}; 297 return {};
274 } 298 }
@@ -290,8 +314,9 @@ private:
290 void FlushMap(MapInterval map) { 314 void FlushMap(MapInterval map) {
291 std::size_t size = map->GetEnd() - map->GetStart(); 315 std::size_t size = map->GetEnd() - map->GetStart();
292 TBuffer block = blocks[map->GetStart() >> block_page_bits]; 316 TBuffer block = blocks[map->GetStart() >> block_page_bits];
293 u8* host_ptr = FromCacheAddr(map->GetStart()); 317 staging_buffer.resize(size);
294 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); 318 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
319 system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
295 map->MarkAsModified(false, 0); 320 map->MarkAsModified(false, 0);
296 } 321 }
297 322
@@ -316,14 +341,14 @@ private:
316 TBuffer EnlargeBlock(TBuffer buffer) { 341 TBuffer EnlargeBlock(TBuffer buffer) {
317 const std::size_t old_size = buffer->GetSize(); 342 const std::size_t old_size = buffer->GetSize();
318 const std::size_t new_size = old_size + block_page_size; 343 const std::size_t new_size = old_size + block_page_size;
319 const CacheAddr cache_addr = buffer->GetCacheAddr(); 344 const VAddr cpu_addr = buffer->GetCpuAddr();
320 TBuffer new_buffer = CreateBlock(cache_addr, new_size); 345 TBuffer new_buffer = CreateBlock(cpu_addr, new_size);
321 CopyBlock(buffer, new_buffer, 0, 0, old_size); 346 CopyBlock(buffer, new_buffer, 0, 0, old_size);
322 buffer->SetEpoch(epoch); 347 buffer->SetEpoch(epoch);
323 pending_destruction.push_back(buffer); 348 pending_destruction.push_back(buffer);
324 const CacheAddr cache_addr_end = cache_addr + new_size - 1; 349 const VAddr cpu_addr_end = cpu_addr + new_size - 1;
325 u64 page_start = cache_addr >> block_page_bits; 350 u64 page_start = cpu_addr >> block_page_bits;
326 const u64 page_end = cache_addr_end >> block_page_bits; 351 const u64 page_end = cpu_addr_end >> block_page_bits;
327 while (page_start <= page_end) { 352 while (page_start <= page_end) {
328 blocks[page_start] = new_buffer; 353 blocks[page_start] = new_buffer;
329 ++page_start; 354 ++page_start;
@@ -334,9 +359,9 @@ private:
334 TBuffer MergeBlocks(TBuffer first, TBuffer second) { 359 TBuffer MergeBlocks(TBuffer first, TBuffer second) {
335 const std::size_t size_1 = first->GetSize(); 360 const std::size_t size_1 = first->GetSize();
336 const std::size_t size_2 = second->GetSize(); 361 const std::size_t size_2 = second->GetSize();
337 const CacheAddr first_addr = first->GetCacheAddr(); 362 const VAddr first_addr = first->GetCpuAddr();
338 const CacheAddr second_addr = second->GetCacheAddr(); 363 const VAddr second_addr = second->GetCpuAddr();
339 const CacheAddr new_addr = std::min(first_addr, second_addr); 364 const VAddr new_addr = std::min(first_addr, second_addr);
340 const std::size_t new_size = size_1 + size_2; 365 const std::size_t new_size = size_1 + size_2;
341 TBuffer new_buffer = CreateBlock(new_addr, new_size); 366 TBuffer new_buffer = CreateBlock(new_addr, new_size);
342 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); 367 CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
@@ -345,9 +370,9 @@ private:
345 second->SetEpoch(epoch); 370 second->SetEpoch(epoch);
346 pending_destruction.push_back(first); 371 pending_destruction.push_back(first);
347 pending_destruction.push_back(second); 372 pending_destruction.push_back(second);
348 const CacheAddr cache_addr_end = new_addr + new_size - 1; 373 const VAddr cpu_addr_end = new_addr + new_size - 1;
349 u64 page_start = new_addr >> block_page_bits; 374 u64 page_start = new_addr >> block_page_bits;
350 const u64 page_end = cache_addr_end >> block_page_bits; 375 const u64 page_end = cpu_addr_end >> block_page_bits;
351 while (page_start <= page_end) { 376 while (page_start <= page_end) {
352 blocks[page_start] = new_buffer; 377 blocks[page_start] = new_buffer;
353 ++page_start; 378 ++page_start;
@@ -355,18 +380,18 @@ private:
355 return new_buffer; 380 return new_buffer;
356 } 381 }
357 382
358 TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { 383 TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
359 TBuffer found{}; 384 TBuffer found{};
360 const CacheAddr cache_addr_end = cache_addr + size - 1; 385 const VAddr cpu_addr_end = cpu_addr + size - 1;
361 u64 page_start = cache_addr >> block_page_bits; 386 u64 page_start = cpu_addr >> block_page_bits;
362 const u64 page_end = cache_addr_end >> block_page_bits; 387 const u64 page_end = cpu_addr_end >> block_page_bits;
363 while (page_start <= page_end) { 388 while (page_start <= page_end) {
364 auto it = blocks.find(page_start); 389 auto it = blocks.find(page_start);
365 if (it == blocks.end()) { 390 if (it == blocks.end()) {
366 if (found) { 391 if (found) {
367 found = EnlargeBlock(found); 392 found = EnlargeBlock(found);
368 } else { 393 } else {
369 const CacheAddr start_addr = (page_start << block_page_bits); 394 const VAddr start_addr = (page_start << block_page_bits);
370 found = CreateBlock(start_addr, block_page_size); 395 found = CreateBlock(start_addr, block_page_size);
371 blocks[page_start] = found; 396 blocks[page_start] = found;
372 } 397 }
@@ -386,7 +411,7 @@ private:
386 return found; 411 return found;
387 } 412 }
388 413
389 void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { 414 void MarkRegionAsWritten(const VAddr start, const VAddr end) {
390 u64 page_start = start >> write_page_bit; 415 u64 page_start = start >> write_page_bit;
391 const u64 page_end = end >> write_page_bit; 416 const u64 page_end = end >> write_page_bit;
392 while (page_start <= page_end) { 417 while (page_start <= page_end) {
@@ -400,7 +425,7 @@ private:
400 } 425 }
401 } 426 }
402 427
403 void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { 428 void UnmarkRegionAsWritten(const VAddr start, const VAddr end) {
404 u64 page_start = start >> write_page_bit; 429 u64 page_start = start >> write_page_bit;
405 const u64 page_end = end >> write_page_bit; 430 const u64 page_end = end >> write_page_bit;
406 while (page_start <= page_end) { 431 while (page_start <= page_end) {
@@ -416,7 +441,7 @@ private:
416 } 441 }
417 } 442 }
418 443
419 bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { 444 bool IsRegionWritten(const VAddr start, const VAddr end) const {
420 u64 page_start = start >> write_page_bit; 445 u64 page_start = start >> write_page_bit;
421 const u64 page_end = end >> write_page_bit; 446 const u64 page_end = end >> write_page_bit;
422 while (page_start <= page_end) { 447 while (page_start <= page_end) {
@@ -440,8 +465,8 @@ private:
440 u64 buffer_offset = 0; 465 u64 buffer_offset = 0;
441 u64 buffer_offset_base = 0; 466 u64 buffer_offset_base = 0;
442 467
443 using IntervalSet = boost::icl::interval_set<CacheAddr>; 468 using IntervalSet = boost::icl::interval_set<VAddr>;
444 using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; 469 using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>;
445 using IntervalType = typename IntervalCache::interval_type; 470 using IntervalType = typename IntervalCache::interval_type;
446 IntervalCache mapped_addresses; 471 IntervalCache mapped_addresses;
447 472
@@ -456,6 +481,8 @@ private:
456 u64 epoch = 0; 481 u64 epoch = 0;
457 u64 modified_ticks = 0; 482 u64 modified_ticks = 0;
458 483
484 std::vector<u8> staging_buffer;
485
459 std::recursive_mutex mutex; 486 std::recursive_mutex mutex;
460}; 487};
461 488
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 3a104d5cd..b0956029d 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -11,7 +11,7 @@ namespace VideoCommon {
11 11
12class MapIntervalBase { 12class MapIntervalBase {
13public: 13public:
14 MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) 14 MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {} 15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16 16
17 void SetCpuAddress(VAddr new_cpu_addr) { 17 void SetCpuAddress(VAddr new_cpu_addr) {
@@ -26,7 +26,7 @@ public:
26 return gpu_addr; 26 return gpu_addr;
27 } 27 }
28 28
29 bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { 29 bool IsInside(const VAddr other_start, const VAddr other_end) const {
30 return (start <= other_start && other_end <= end); 30 return (start <= other_start && other_end <= end);
31 } 31 }
32 32
@@ -46,11 +46,11 @@ public:
46 return is_registered; 46 return is_registered;
47 } 47 }
48 48
49 CacheAddr GetStart() const { 49 VAddr GetStart() const {
50 return start; 50 return start;
51 } 51 }
52 52
53 CacheAddr GetEnd() const { 53 VAddr GetEnd() const {
54 return end; 54 return end;
55 } 55 }
56 56
@@ -76,8 +76,8 @@ public:
76 } 76 }
77 77
78private: 78private:
79 CacheAddr start; 79 VAddr start;
80 CacheAddr end; 80 VAddr end;
81 GPUVAddr gpu_addr; 81 GPUVAddr gpu_addr;
82 VAddr cpu_addr{}; 82 VAddr cpu_addr{};
83 bool is_written{}; 83 bool is_written{};
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index d24c9f657..5cf6a4cc3 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -303,6 +303,10 @@ public:
303 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); 303 return (type == Type::SignedNorm) || (type == Type::UnsignedNorm);
304 } 304 }
305 305
306 bool IsConstant() const {
307 return constant;
308 }
309
306 bool IsValid() const { 310 bool IsValid() const {
307 return size != Size::Invalid; 311 return size != Size::Invalid;
308 } 312 }
@@ -312,6 +316,35 @@ public:
312 } 316 }
313 }; 317 };
314 318
319 struct MsaaSampleLocation {
320 union {
321 BitField<0, 4, u32> x0;
322 BitField<4, 4, u32> y0;
323 BitField<8, 4, u32> x1;
324 BitField<12, 4, u32> y1;
325 BitField<16, 4, u32> x2;
326 BitField<20, 4, u32> y2;
327 BitField<24, 4, u32> x3;
328 BitField<28, 4, u32> y3;
329 };
330
331 constexpr std::pair<u32, u32> Location(int index) const {
332 switch (index) {
333 case 0:
334 return {x0, y0};
335 case 1:
336 return {x1, y1};
337 case 2:
338 return {x2, y2};
339 case 3:
340 return {x3, y3};
341 default:
342 UNREACHABLE();
343 return {0, 0};
344 }
345 }
346 };
347
315 enum class DepthMode : u32 { 348 enum class DepthMode : u32 {
316 MinusOneToOne = 0, 349 MinusOneToOne = 0,
317 ZeroToOne = 1, 350 ZeroToOne = 1,
@@ -793,7 +826,13 @@ public:
793 826
794 u32 rt_separate_frag_data; 827 u32 rt_separate_frag_data;
795 828
796 INSERT_UNION_PADDING_WORDS(0xC); 829 INSERT_UNION_PADDING_WORDS(0x1);
830
831 u32 multisample_raster_enable;
832 u32 multisample_raster_samples;
833 std::array<u32, 4> multisample_sample_mask;
834
835 INSERT_UNION_PADDING_WORDS(0x5);
797 836
798 struct { 837 struct {
799 u32 address_high; 838 u32 address_high;
@@ -830,7 +869,16 @@ public:
830 869
831 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; 870 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
832 871
833 INSERT_UNION_PADDING_WORDS(0xF); 872 std::array<MsaaSampleLocation, 4> multisample_sample_locations;
873
874 INSERT_UNION_PADDING_WORDS(0x2);
875
876 union {
877 BitField<0, 1, u32> enable;
878 BitField<4, 3, u32> target;
879 } multisample_coverage_to_color;
880
881 INSERT_UNION_PADDING_WORDS(0x8);
834 882
835 struct { 883 struct {
836 union { 884 union {
@@ -922,7 +970,10 @@ public:
922 BitField<4, 1, u32> triangle_rast_flip; 970 BitField<4, 1, u32> triangle_rast_flip;
923 } screen_y_control; 971 } screen_y_control;
924 972
925 INSERT_UNION_PADDING_WORDS(0x21); 973 float line_width_smooth;
974 float line_width_aliased;
975
976 INSERT_UNION_PADDING_WORDS(0x1F);
926 977
927 u32 vb_element_base; 978 u32 vb_element_base;
928 u32 vb_base_instance; 979 u32 vb_base_instance;
@@ -943,7 +994,7 @@ public:
943 994
944 CounterReset counter_reset; 995 CounterReset counter_reset;
945 996
946 INSERT_UNION_PADDING_WORDS(0x1); 997 u32 multisample_enable;
947 998
948 u32 zeta_enable; 999 u32 zeta_enable;
949 1000
@@ -980,7 +1031,7 @@ public:
980 1031
981 float polygon_offset_factor; 1032 float polygon_offset_factor;
982 1033
983 INSERT_UNION_PADDING_WORDS(0x1); 1034 u32 line_smooth_enable;
984 1035
985 struct { 1036 struct {
986 u32 tic_address_high; 1037 u32 tic_address_high;
@@ -1007,7 +1058,11 @@ public:
1007 1058
1008 float polygon_offset_units; 1059 float polygon_offset_units;
1009 1060
1010 INSERT_UNION_PADDING_WORDS(0x11); 1061 INSERT_UNION_PADDING_WORDS(0x4);
1062
1063 Tegra::Texture::MsaaMode multisample_mode;
1064
1065 INSERT_UNION_PADDING_WORDS(0xC);
1011 1066
1012 union { 1067 union {
1013 BitField<2, 1, u32> coord_origin; 1068 BitField<2, 1, u32> coord_origin;
@@ -1507,12 +1562,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
1507ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); 1562ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
1508ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); 1563ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
1509ASSERT_REG_POSITION(color_mask_common, 0x3E4); 1564ASSERT_REG_POSITION(color_mask_common, 0x3E4);
1510ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1511ASSERT_REG_POSITION(depth_bounds, 0x3E7); 1565ASSERT_REG_POSITION(depth_bounds, 0x3E7);
1566ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
1567ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
1568ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
1569ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
1512ASSERT_REG_POSITION(zeta, 0x3F8); 1570ASSERT_REG_POSITION(zeta, 0x3F8);
1513ASSERT_REG_POSITION(clear_flags, 0x43E); 1571ASSERT_REG_POSITION(clear_flags, 0x43E);
1514ASSERT_REG_POSITION(fill_rectangle, 0x44F); 1572ASSERT_REG_POSITION(fill_rectangle, 0x44F);
1515ASSERT_REG_POSITION(vertex_attrib_format, 0x458); 1573ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
1574ASSERT_REG_POSITION(multisample_sample_locations, 0x478);
1575ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
1516ASSERT_REG_POSITION(rt_control, 0x487); 1576ASSERT_REG_POSITION(rt_control, 0x487);
1517ASSERT_REG_POSITION(zeta_width, 0x48a); 1577ASSERT_REG_POSITION(zeta_width, 0x48a);
1518ASSERT_REG_POSITION(zeta_height, 0x48b); 1578ASSERT_REG_POSITION(zeta_height, 0x48b);
@@ -1538,6 +1598,8 @@ ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6);
1538ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); 1598ASSERT_REG_POSITION(stencil_front_mask, 0x4E7);
1539ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); 1599ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
1540ASSERT_REG_POSITION(screen_y_control, 0x4EB); 1600ASSERT_REG_POSITION(screen_y_control, 0x4EB);
1601ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
1602ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
1541ASSERT_REG_POSITION(vb_element_base, 0x50D); 1603ASSERT_REG_POSITION(vb_element_base, 0x50D);
1542ASSERT_REG_POSITION(vb_base_instance, 0x50E); 1604ASSERT_REG_POSITION(vb_base_instance, 0x50E);
1543ASSERT_REG_POSITION(clip_distance_enabled, 0x544); 1605ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
@@ -1545,11 +1607,13 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545);
1545ASSERT_REG_POSITION(point_size, 0x546); 1607ASSERT_REG_POSITION(point_size, 0x546);
1546ASSERT_REG_POSITION(point_sprite_enable, 0x548); 1608ASSERT_REG_POSITION(point_sprite_enable, 0x548);
1547ASSERT_REG_POSITION(counter_reset, 0x54C); 1609ASSERT_REG_POSITION(counter_reset, 0x54C);
1610ASSERT_REG_POSITION(multisample_enable, 0x54D);
1548ASSERT_REG_POSITION(zeta_enable, 0x54E); 1611ASSERT_REG_POSITION(zeta_enable, 0x54E);
1549ASSERT_REG_POSITION(multisample_control, 0x54F); 1612ASSERT_REG_POSITION(multisample_control, 0x54F);
1550ASSERT_REG_POSITION(condition, 0x554); 1613ASSERT_REG_POSITION(condition, 0x554);
1551ASSERT_REG_POSITION(tsc, 0x557); 1614ASSERT_REG_POSITION(tsc, 0x557);
1552ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); 1615ASSERT_REG_POSITION(polygon_offset_factor, 0x55B);
1616ASSERT_REG_POSITION(line_smooth_enable, 0x55C);
1553ASSERT_REG_POSITION(tic, 0x55D); 1617ASSERT_REG_POSITION(tic, 0x55D);
1554ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); 1618ASSERT_REG_POSITION(stencil_two_side_enable, 0x565);
1555ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); 1619ASSERT_REG_POSITION(stencil_back_op_fail, 0x566);
@@ -1558,6 +1622,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568);
1558ASSERT_REG_POSITION(stencil_back_func_func, 0x569); 1622ASSERT_REG_POSITION(stencil_back_func_func, 0x569);
1559ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); 1623ASSERT_REG_POSITION(framebuffer_srgb, 0x56E);
1560ASSERT_REG_POSITION(polygon_offset_units, 0x56F); 1624ASSERT_REG_POSITION(polygon_offset_units, 0x56F);
1625ASSERT_REG_POSITION(multisample_mode, 0x574);
1561ASSERT_REG_POSITION(point_coord_replace, 0x581); 1626ASSERT_REG_POSITION(point_coord_replace, 0x581);
1562ASSERT_REG_POSITION(code_address, 0x582); 1627ASSERT_REG_POSITION(code_address, 0x582);
1563ASSERT_REG_POSITION(draw, 0x585); 1628ASSERT_REG_POSITION(draw, 0x585);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index a31947ef3..5e9cfba22 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -290,6 +290,23 @@ enum class VmadShr : u64 {
290 Shr15 = 2, 290 Shr15 = 2,
291}; 291};
292 292
293enum class VmnmxType : u64 {
294 Bits8,
295 Bits16,
296 Bits32,
297};
298
299enum class VmnmxOperation : u64 {
300 Mrg_16H = 0,
301 Mrg_16L = 1,
302 Mrg_8B0 = 2,
303 Mrg_8B2 = 3,
304 Acc = 4,
305 Min = 5,
306 Max = 6,
307 Nop = 7,
308};
309
293enum class XmadMode : u64 { 310enum class XmadMode : u64 {
294 None = 0, 311 None = 0,
295 CLo = 1, 312 CLo = 1,
@@ -1657,6 +1674,42 @@ union Instruction {
1657 } vmad; 1674 } vmad;
1658 1675
1659 union { 1676 union {
1677 BitField<54, 1, u64> is_dest_signed;
1678 BitField<48, 1, u64> is_src_a_signed;
1679 BitField<49, 1, u64> is_src_b_signed;
1680 BitField<37, 2, u64> src_format_a;
1681 BitField<29, 2, u64> src_format_b;
1682 BitField<56, 1, u64> mx;
1683 BitField<55, 1, u64> sat;
1684 BitField<36, 2, u64> selector_a;
1685 BitField<28, 2, u64> selector_b;
1686 BitField<50, 1, u64> is_op_b_register;
1687 BitField<51, 3, VmnmxOperation> operation;
1688
1689 VmnmxType SourceFormatA() const {
1690 switch (src_format_a) {
1691 case 0b11:
1692 return VmnmxType::Bits32;
1693 case 0b10:
1694 return VmnmxType::Bits16;
1695 default:
1696 return VmnmxType::Bits8;
1697 }
1698 }
1699
1700 VmnmxType SourceFormatB() const {
1701 switch (src_format_b) {
1702 case 0b11:
1703 return VmnmxType::Bits32;
1704 case 0b10:
1705 return VmnmxType::Bits16;
1706 default:
1707 return VmnmxType::Bits8;
1708 }
1709 }
1710 } vmnmx;
1711
1712 union {
1660 BitField<20, 16, u64> imm20_16; 1713 BitField<20, 16, u64> imm20_16;
1661 BitField<35, 1, u64> high_b_rr; // used on RR 1714 BitField<35, 1, u64> high_b_rr; // used on RR
1662 BitField<36, 1, u64> product_shift_left; 1715 BitField<36, 1, u64> product_shift_left;
@@ -1718,6 +1771,7 @@ public:
1718 BRK, 1771 BRK,
1719 DEPBAR, 1772 DEPBAR,
1720 VOTE, 1773 VOTE,
1774 VOTE_VTG,
1721 SHFL, 1775 SHFL,
1722 FSWZADD, 1776 FSWZADD,
1723 BFE_C, 1777 BFE_C,
@@ -1765,9 +1819,11 @@ public:
1765 IPA, 1819 IPA,
1766 OUT_R, // Emit vertex/primitive 1820 OUT_R, // Emit vertex/primitive
1767 ISBERD, 1821 ISBERD,
1822 BAR,
1768 MEMBAR, 1823 MEMBAR,
1769 VMAD, 1824 VMAD,
1770 VSETP, 1825 VSETP,
1826 VMNMX,
1771 FFMA_IMM, // Fused Multiply and Add 1827 FFMA_IMM, // Fused Multiply and Add
1772 FFMA_CR, 1828 FFMA_CR,
1773 FFMA_RC, 1829 FFMA_RC,
@@ -1822,7 +1878,8 @@ public:
1822 ICMP_R, 1878 ICMP_R,
1823 ICMP_CR, 1879 ICMP_CR,
1824 ICMP_IMM, 1880 ICMP_IMM,
1825 FCMP_R, 1881 FCMP_RR,
1882 FCMP_RC,
1826 MUFU, // Multi-Function Operator 1883 MUFU, // Multi-Function Operator
1827 RRO_C, // Range Reduction Operator 1884 RRO_C, // Range Reduction Operator
1828 RRO_R, 1885 RRO_R,
@@ -1849,7 +1906,7 @@ public:
1849 MOV_C, 1906 MOV_C,
1850 MOV_R, 1907 MOV_R,
1851 MOV_IMM, 1908 MOV_IMM,
1852 MOV_SYS, 1909 S2R,
1853 MOV32_IMM, 1910 MOV32_IMM,
1854 SHL_C, 1911 SHL_C,
1855 SHL_R, 1912 SHL_R,
@@ -2033,6 +2090,7 @@ private:
2033 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), 2090 INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
2034 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), 2091 INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
2035 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), 2092 INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
2093 INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"),
2036 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), 2094 INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
2037 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), 2095 INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
2038 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), 2096 INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
@@ -2071,9 +2129,11 @@ private:
2071 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 2129 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
2072 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 2130 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
2073 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), 2131 INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
2132 INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"),
2074 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), 2133 INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"),
2075 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), 2134 INST("01011111--------", Id::VMAD, Type::Video, "VMAD"),
2076 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), 2135 INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"),
2136 INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"),
2077 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), 2137 INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
2078 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), 2138 INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
2079 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), 2139 INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
@@ -2128,7 +2188,8 @@ private:
2128 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), 2188 INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
2129 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), 2189 INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
2130 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), 2190 INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
2131 INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"), 2191 INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
2192 INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
2132 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), 2193 INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
2133 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), 2194 INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
2134 INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), 2195 INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -2142,7 +2203,7 @@ private:
2142 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), 2203 INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
2143 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), 2204 INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
2144 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), 2205 INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
2145 INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"), 2206 INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"),
2146 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), 2207 INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
2147 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), 2208 INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
2148 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), 2209 INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
@@ -2174,7 +2235,7 @@ private:
2174 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), 2235 INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"),
2175 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), 2236 INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"),
2176 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), 2237 INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"),
2177 INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), 2238 INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"),
2178 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), 2239 INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"),
2179 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), 2240 INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"),
2180 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), 2241 INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index bc80661d8..72e2a33d5 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -4,6 +4,9 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <optional>
9
7#include "common/bit_field.h" 10#include "common/bit_field.h"
8#include "common/common_funcs.h" 11#include "common/common_funcs.h"
9#include "common/common_types.h" 12#include "common/common_types.h"
@@ -16,7 +19,7 @@ enum class OutputTopology : u32 {
16 TriangleStrip = 7, 19 TriangleStrip = 7,
17}; 20};
18 21
19enum class AttributeUse : u8 { 22enum class PixelImap : u8 {
20 Unused = 0, 23 Unused = 0,
21 Constant = 1, 24 Constant = 1,
22 Perspective = 2, 25 Perspective = 2,
@@ -24,7 +27,7 @@ enum class AttributeUse : u8 {
24}; 27};
25 28
26// Documentation in: 29// Documentation in:
27// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture 30// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html
28struct Header { 31struct Header {
29 union { 32 union {
30 BitField<0, 5, u32> sph_type; 33 BitField<0, 5, u32> sph_type;
@@ -59,8 +62,8 @@ struct Header {
59 union { 62 union {
60 BitField<0, 12, u32> max_output_vertices; 63 BitField<0, 12, u32> max_output_vertices;
61 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. 64 BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
62 BitField<24, 4, u32> reserved; 65 BitField<20, 4, u32> reserved;
63 BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. 66 BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
64 } common4{}; 67 } common4{};
65 68
66 union { 69 union {
@@ -93,17 +96,20 @@ struct Header {
93 struct { 96 struct {
94 INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA 97 INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
95 INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB 98 INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
99
96 union { 100 union {
97 BitField<0, 2, AttributeUse> x; 101 BitField<0, 2, PixelImap> x;
98 BitField<2, 2, AttributeUse> y; 102 BitField<2, 2, PixelImap> y;
99 BitField<4, 2, AttributeUse> w; 103 BitField<4, 2, PixelImap> z;
100 BitField<6, 2, AttributeUse> z; 104 BitField<6, 2, PixelImap> w;
101 u8 raw; 105 u8 raw;
102 } imap_generic_vector[32]; 106 } imap_generic_vector[32];
107
103 INSERT_UNION_PADDING_BYTES(2); // ImapColor 108 INSERT_UNION_PADDING_BYTES(2); // ImapColor
104 INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC 109 INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC
105 INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] 110 INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
106 INSERT_UNION_PADDING_BYTES(2); // ImapReserved 111 INSERT_UNION_PADDING_BYTES(2); // ImapReserved
112
107 struct { 113 struct {
108 u32 target; 114 u32 target;
109 union { 115 union {
@@ -112,31 +118,30 @@ struct Header {
112 BitField<2, 30, u32> reserved; 118 BitField<2, 30, u32> reserved;
113 }; 119 };
114 } omap; 120 } omap;
121
115 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { 122 bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
116 const u32 bit = render_target * 4 + component; 123 const u32 bit = render_target * 4 + component;
117 return omap.target & (1 << bit); 124 return omap.target & (1 << bit);
118 } 125 }
119 AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { 126
120 return static_cast<AttributeUse>( 127 PixelImap GetPixelImap(u32 attribute) const {
121 (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); 128 const auto get_index = [this, attribute](u32 index) {
122 } 129 return static_cast<PixelImap>(
123 AttributeUse GetAttributeUse(u32 attribute) const { 130 (imap_generic_vector[attribute].raw >> (index * 2)) & 3);
124 AttributeUse result = AttributeUse::Unused; 131 };
125 for (u32 i = 0; i < 4; i++) { 132
126 const auto index = GetAttributeIndexUse(attribute, i); 133 std::optional<PixelImap> result;
127 if (index == AttributeUse::Unused) { 134 for (u32 component = 0; component < 4; ++component) {
128 continue; 135 const PixelImap index = get_index(component);
129 } 136 if (index == PixelImap::Unused) {
130 if (result == AttributeUse::Unused || result == index) {
131 result = index;
132 continue; 137 continue;
133 } 138 }
134 LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); 139 if (result && result != index) {
135 if (index == AttributeUse::Perspective) { 140 LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode");
136 result = index;
137 } 141 }
142 result = index;
138 } 143 }
139 return result; 144 return result.value_or(PixelImap::Unused);
140 } 145 }
141 } ps; 146 } ps;
142 147
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ced9d7e28..1a2d747be 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -270,13 +270,13 @@ public:
270 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; 270 virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
271 271
272 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 272 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
273 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 273 virtual void FlushRegion(VAddr addr, u64 size) = 0;
274 274
275 /// Notify rasterizer that any caches of the specified region should be invalidated 275 /// Notify rasterizer that any caches of the specified region should be invalidated
276 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; 276 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
277 277
278 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 278 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
279 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 279 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
280 280
281protected: 281protected:
282 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; 282 virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 925be8d7b..20e73a37e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -12,8 +12,9 @@ namespace VideoCommon {
12 12
13GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, 13GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
14 std::unique_ptr<Core::Frontend::GraphicsContext>&& context) 14 std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
15 : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), 15 : GPU(system, std::move(renderer_), true), gpu_thread{system},
16 cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} 16 cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
17 gpu_context(std::move(context)) {}
17 18
18GPUAsynch::~GPUAsynch() = default; 19GPUAsynch::~GPUAsynch() = default;
19 20
@@ -30,15 +31,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
30 gpu_thread.SwapBuffers(framebuffer); 31 gpu_thread.SwapBuffers(framebuffer);
31} 32}
32 33
33void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { 34void GPUAsynch::FlushRegion(VAddr addr, u64 size) {
34 gpu_thread.FlushRegion(addr, size); 35 gpu_thread.FlushRegion(addr, size);
35} 36}
36 37
37void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { 38void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) {
38 gpu_thread.InvalidateRegion(addr, size); 39 gpu_thread.InvalidateRegion(addr, size);
39} 40}
40 41
41void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 42void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
42 gpu_thread.FlushAndInvalidateRegion(addr, size); 43 gpu_thread.FlushAndInvalidateRegion(addr, size);
43} 44}
44 45
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 265c62758..03fd0eef0 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -27,9 +27,9 @@ public:
27 void Start() override; 27 void Start() override;
28 void PushGPUEntries(Tegra::CommandList&& entries) override; 28 void PushGPUEntries(Tegra::CommandList&& entries) override;
29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 29 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
30 void FlushRegion(CacheAddr addr, u64 size) override; 30 void FlushRegion(VAddr addr, u64 size) override;
31 void InvalidateRegion(CacheAddr addr, u64 size) override; 31 void InvalidateRegion(VAddr addr, u64 size) override;
32 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
33 void WaitIdle() const override; 33 void WaitIdle() const override;
34 34
35protected: 35protected:
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index bd5278a5c..6f38a672a 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
26 renderer->SwapBuffers(framebuffer); 26 renderer->SwapBuffers(framebuffer);
27} 27}
28 28
29void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { 29void GPUSynch::FlushRegion(VAddr addr, u64 size) {
30 renderer->Rasterizer().FlushRegion(addr, size); 30 renderer->Rasterizer().FlushRegion(addr, size);
31} 31}
32 32
33void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { 33void GPUSynch::InvalidateRegion(VAddr addr, u64 size) {
34 renderer->Rasterizer().InvalidateRegion(addr, size); 34 renderer->Rasterizer().InvalidateRegion(addr, size);
35} 35}
36 36
37void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 37void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) {
38 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); 38 renderer->Rasterizer().FlushAndInvalidateRegion(addr, size);
39} 39}
40 40
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 866a94c8c..4a6e9a01d 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -26,9 +26,9 @@ public:
26 void Start() override; 26 void Start() override;
27 void PushGPUEntries(Tegra::CommandList&& entries) override; 27 void PushGPUEntries(Tegra::CommandList&& entries) override;
28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 28 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
29 void FlushRegion(CacheAddr addr, u64 size) override; 29 void FlushRegion(VAddr addr, u64 size) override;
30 void InvalidateRegion(CacheAddr addr, u64 size) override; 30 void InvalidateRegion(VAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 31 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
32 void WaitIdle() const override {} 32 void WaitIdle() const override {}
33 33
34protected: 34protected:
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 270c7ae0d..10cda686b 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
77 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); 77 PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt));
78} 78}
79 79
80void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { 80void ThreadManager::FlushRegion(VAddr addr, u64 size) {
81 PushCommand(FlushRegionCommand(addr, size)); 81 PushCommand(FlushRegionCommand(addr, size));
82} 82}
83 83
84void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { 84void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
85 system.Renderer().Rasterizer().InvalidateRegion(addr, size); 85 system.Renderer().Rasterizer().InvalidateRegion(addr, size);
86} 86}
87 87
88void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 88void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size); 90 InvalidateRegion(addr, size);
91} 91}
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index be36c580e..cd74ad330 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -47,26 +47,26 @@ struct SwapBuffersCommand final {
47 47
48/// Command to signal to the GPU thread to flush a region 48/// Command to signal to the GPU thread to flush a region
49struct FlushRegionCommand final { 49struct FlushRegionCommand final {
50 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} 50 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
51 51
52 CacheAddr addr; 52 VAddr addr;
53 u64 size; 53 u64 size;
54}; 54};
55 55
56/// Command to signal to the GPU thread to invalidate a region 56/// Command to signal to the GPU thread to invalidate a region
57struct InvalidateRegionCommand final { 57struct InvalidateRegionCommand final {
58 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} 58 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
59 59
60 CacheAddr addr; 60 VAddr addr;
61 u64 size; 61 u64 size;
62}; 62};
63 63
64/// Command to signal to the GPU thread to flush and invalidate a region 64/// Command to signal to the GPU thread to flush and invalidate a region
65struct FlushAndInvalidateRegionCommand final { 65struct FlushAndInvalidateRegionCommand final {
66 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) 66 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
67 : addr{addr}, size{size} {} 67 : addr{addr}, size{size} {}
68 68
69 CacheAddr addr; 69 VAddr addr;
70 u64 size; 70 u64 size;
71}; 71};
72 72
@@ -111,13 +111,13 @@ public:
111 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); 111 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
112 112
113 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 113 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
114 void FlushRegion(CacheAddr addr, u64 size); 114 void FlushRegion(VAddr addr, u64 size);
115 115
116 /// Notify rasterizer that any caches of the specified region should be invalidated 116 /// Notify rasterizer that any caches of the specified region should be invalidated
117 void InvalidateRegion(CacheAddr addr, u64 size); 117 void InvalidateRegion(VAddr addr, u64 size);
118 118
119 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 119 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
120 void FlushAndInvalidateRegion(CacheAddr addr, u64 size); 120 void FlushAndInvalidateRegion(VAddr addr, u64 size);
121 121
122 // Wait until the gpu thread is idle. 122 // Wait until the gpu thread is idle.
123 void WaitIdle() const; 123 void WaitIdle() const;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f5d33f27a..a3389d0d2 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
81 ASSERT((gpu_addr & page_mask) == 0); 81 ASSERT((gpu_addr & page_mask) == 0);
82 82
83 const u64 aligned_size{Common::AlignUp(size, page_size)}; 83 const u64 aligned_size{Common::AlignUp(size, page_size)};
84 const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))};
85 const auto cpu_addr = GpuToCpuAddress(gpu_addr); 84 const auto cpu_addr = GpuToCpuAddress(gpu_addr);
86 ASSERT(cpu_addr); 85 ASSERT(cpu_addr);
87 86
88 // Flush and invalidate through the GPU interface, to be asynchronous if possible. 87 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
89 system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); 88 system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size);
90 89
91 UnmapRange(gpu_addr, aligned_size); 90 UnmapRange(gpu_addr, aligned_size);
92 ASSERT(system.CurrentProcess() 91 ASSERT(system.CurrentProcess()
@@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const {
140 return {}; 139 return {};
141 } 140 }
142 141
143 const u8* page_pointer{page_table.pointers[addr >> page_bits]}; 142 const u8* page_pointer{GetPointer(addr)};
144 if (page_pointer) { 143 if (page_pointer) {
145 // NOTE: Avoid adding any extra logic to this fast-path block 144 // NOTE: Avoid adding any extra logic to this fast-path block
146 T value; 145 T value;
147 std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); 146 std::memcpy(&value, page_pointer, sizeof(T));
148 return value; 147 return value;
149 } 148 }
150 149
@@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) {
167 return; 166 return;
168 } 167 }
169 168
170 u8* page_pointer{page_table.pointers[addr >> page_bits]}; 169 u8* page_pointer{GetPointer(addr)};
171 if (page_pointer) { 170 if (page_pointer) {
172 // NOTE: Avoid adding any extra logic to this fast-path block 171 // NOTE: Avoid adding any extra logic to this fast-path block
173 std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); 172 std::memcpy(page_pointer, &data, sizeof(T));
174 return; 173 return;
175 } 174 }
176 175
@@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) {
201 return {}; 200 return {};
202 } 201 }
203 202
204 u8* const page_pointer{page_table.pointers[addr >> page_bits]}; 203 auto& memory = system.Memory();
205 if (page_pointer != nullptr) { 204
206 return page_pointer + (addr & page_mask); 205 const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
206
207 if (page_addr != 0) {
208 return memory.GetPointer(page_addr + (addr & page_mask));
207 } 209 }
208 210
209 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); 211 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const {
215 return {}; 217 return {};
216 } 218 }
217 219
218 const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; 220 const auto& memory = system.Memory();
219 if (page_pointer != nullptr) { 221
220 return page_pointer + (addr & page_mask); 222 const VAddr page_addr{page_table.backing_addr[addr >> page_bits]};
223
224 if (page_addr != 0) {
225 return memory.GetPointer(page_addr + (addr & page_mask));
221 } 226 }
222 227
223 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); 228 LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr);
@@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
238 std::size_t page_index{src_addr >> page_bits}; 243 std::size_t page_index{src_addr >> page_bits};
239 std::size_t page_offset{src_addr & page_mask}; 244 std::size_t page_offset{src_addr & page_mask};
240 245
246 auto& memory = system.Memory();
247
241 while (remaining_size > 0) { 248 while (remaining_size > 0) {
242 const std::size_t copy_amount{ 249 const std::size_t copy_amount{
243 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 250 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
244 251
245 switch (page_table.attributes[page_index]) { 252 switch (page_table.attributes[page_index]) {
246 case Common::PageType::Memory: { 253 case Common::PageType::Memory: {
247 const u8* src_ptr{page_table.pointers[page_index] + page_offset}; 254 const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
248 // Flush must happen on the rasterizer interface, such that memory is always synchronous 255 // Flush must happen on the rasterizer interface, such that memory is always synchronous
249 // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. 256 // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
250 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); 257 rasterizer.FlushRegion(src_addr, copy_amount);
251 std::memcpy(dest_buffer, src_ptr, copy_amount); 258 memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
252 break; 259 break;
253 } 260 }
254 default: 261 default:
@@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer,
268 std::size_t page_index{src_addr >> page_bits}; 275 std::size_t page_index{src_addr >> page_bits};
269 std::size_t page_offset{src_addr & page_mask}; 276 std::size_t page_offset{src_addr & page_mask};
270 277
278 auto& memory = system.Memory();
279
271 while (remaining_size > 0) { 280 while (remaining_size > 0) {
272 const std::size_t copy_amount{ 281 const std::size_t copy_amount{
273 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 282 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
274 const u8* page_pointer = page_table.pointers[page_index]; 283 const u8* page_pointer = page_table.pointers[page_index];
275 if (page_pointer) { 284 if (page_pointer) {
276 const u8* src_ptr{page_pointer + page_offset}; 285 const VAddr src_addr{page_table.backing_addr[page_index] + page_offset};
277 std::memcpy(dest_buffer, src_ptr, copy_amount); 286 memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
278 } else { 287 } else {
279 std::memset(dest_buffer, 0, copy_amount); 288 std::memset(dest_buffer, 0, copy_amount);
280 } 289 }
@@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
290 std::size_t page_index{dest_addr >> page_bits}; 299 std::size_t page_index{dest_addr >> page_bits};
291 std::size_t page_offset{dest_addr & page_mask}; 300 std::size_t page_offset{dest_addr & page_mask};
292 301
302 auto& memory = system.Memory();
303
293 while (remaining_size > 0) { 304 while (remaining_size > 0) {
294 const std::size_t copy_amount{ 305 const std::size_t copy_amount{
295 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 306 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
296 307
297 switch (page_table.attributes[page_index]) { 308 switch (page_table.attributes[page_index]) {
298 case Common::PageType::Memory: { 309 case Common::PageType::Memory: {
299 u8* dest_ptr{page_table.pointers[page_index] + page_offset}; 310 const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
300 // Invalidate must happen on the rasterizer interface, such that memory is always 311 // Invalidate must happen on the rasterizer interface, such that memory is always
301 // synchronous when it is written (even when in asynchronous GPU mode). 312 // synchronous when it is written (even when in asynchronous GPU mode).
302 rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); 313 rasterizer.InvalidateRegion(dest_addr, copy_amount);
303 std::memcpy(dest_ptr, src_buffer, copy_amount); 314 memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
304 break; 315 break;
305 } 316 }
306 default: 317 default:
@@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
320 std::size_t page_index{dest_addr >> page_bits}; 331 std::size_t page_index{dest_addr >> page_bits};
321 std::size_t page_offset{dest_addr & page_mask}; 332 std::size_t page_offset{dest_addr & page_mask};
322 333
334 auto& memory = system.Memory();
335
323 while (remaining_size > 0) { 336 while (remaining_size > 0) {
324 const std::size_t copy_amount{ 337 const std::size_t copy_amount{
325 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; 338 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
326 u8* page_pointer = page_table.pointers[page_index]; 339 u8* page_pointer = page_table.pointers[page_index];
327 if (page_pointer) { 340 if (page_pointer) {
328 u8* dest_ptr{page_pointer + page_offset}; 341 const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset};
329 std::memcpy(dest_ptr, src_buffer, copy_amount); 342 memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
330 } 343 }
331 page_index++; 344 page_index++;
332 page_offset = 0; 345 page_offset = 0;
@@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer,
336} 349}
337 350
338void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 351void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
339 std::size_t remaining_size{size}; 352 std::vector<u8> tmp_buffer(size);
340 std::size_t page_index{src_addr >> page_bits}; 353 ReadBlock(src_addr, tmp_buffer.data(), size);
341 std::size_t page_offset{src_addr & page_mask}; 354 WriteBlock(dest_addr, tmp_buffer.data(), size);
342
343 while (remaining_size > 0) {
344 const std::size_t copy_amount{
345 std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)};
346
347 switch (page_table.attributes[page_index]) {
348 case Common::PageType::Memory: {
349 // Flush must happen on the rasterizer interface, such that memory is always synchronous
350 // when it is copied (even when in asynchronous GPU mode).
351 const u8* src_ptr{page_table.pointers[page_index] + page_offset};
352 rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
353 WriteBlock(dest_addr, src_ptr, copy_amount);
354 break;
355 }
356 default:
357 UNREACHABLE();
358 }
359
360 page_index++;
361 page_offset = 0;
362 dest_addr += static_cast<VAddr>(copy_amount);
363 src_addr += static_cast<VAddr>(copy_amount);
364 remaining_size -= copy_amount;
365 }
366} 355}
367 356
368void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { 357void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) {
@@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const
371 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); 360 WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size);
372} 361}
373 362
363bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
364 const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits];
365 const std::size_t page = (addr & Memory::PAGE_MASK) + size;
366 return page <= Memory::PAGE_SIZE;
367}
368
374void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, 369void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type,
375 VAddr backing_addr) { 370 VAddr backing_addr) {
376 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, 371 LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 073bdb491..0d9468535 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -97,6 +97,11 @@ public:
97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); 97 void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); 98 void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size);
99 99
100 /**
101 * IsGranularRange checks if a gpu region can be simply read with a pointer
102 */
103 bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
104
100private: 105private:
101 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; 106 using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>;
102 using VMAHandle = VMAMap::const_iterator; 107 using VMAHandle = VMAMap::const_iterator;
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index e66054ed0..5ea2b01f2 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -98,12 +98,12 @@ public:
98 static_cast<QueryCache&>(*this), 98 static_cast<QueryCache&>(*this),
99 VideoCore::QueryType::SamplesPassed}}} {} 99 VideoCore::QueryType::SamplesPassed}}} {}
100 100
101 void InvalidateRegion(CacheAddr addr, std::size_t size) { 101 void InvalidateRegion(VAddr addr, std::size_t size) {
102 std::unique_lock lock{mutex}; 102 std::unique_lock lock{mutex};
103 FlushAndRemoveRegion(addr, size); 103 FlushAndRemoveRegion(addr, size);
104 } 104 }
105 105
106 void FlushRegion(CacheAddr addr, std::size_t size) { 106 void FlushRegion(VAddr addr, std::size_t size) {
107 std::unique_lock lock{mutex}; 107 std::unique_lock lock{mutex};
108 FlushAndRemoveRegion(addr, size); 108 FlushAndRemoveRegion(addr, size);
109 } 109 }
@@ -117,14 +117,16 @@ public:
117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { 117 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
118 std::unique_lock lock{mutex}; 118 std::unique_lock lock{mutex};
119 auto& memory_manager = system.GPU().MemoryManager(); 119 auto& memory_manager = system.GPU().MemoryManager();
120 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 120 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
121 ASSERT(cpu_addr_opt);
122 VAddr cpu_addr = *cpu_addr_opt;
121 123
122 CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); 124 CachedQuery* query = TryGet(cpu_addr);
123 if (!query) { 125 if (!query) {
124 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); 126 ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
125 ASSERT_OR_EXECUTE(cpu_addr, return;); 127 const auto host_ptr = memory_manager.GetPointer(gpu_addr);
126 128
127 query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); 129 query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
128 } 130 }
129 131
130 query->BindCounter(Stream(type).Current(), timestamp); 132 query->BindCounter(Stream(type).Current(), timestamp);
@@ -173,11 +175,11 @@ protected:
173 175
174private: 176private:
175 /// Flushes a memory range to guest memory and removes it from the cache. 177 /// Flushes a memory range to guest memory and removes it from the cache.
176 void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { 178 void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
177 const u64 addr_begin = static_cast<u64>(addr); 179 const u64 addr_begin = static_cast<u64>(addr);
178 const u64 addr_end = addr_begin + static_cast<u64>(size); 180 const u64 addr_end = addr_begin + static_cast<u64>(size);
179 const auto in_range = [addr_begin, addr_end](CachedQuery& query) { 181 const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
180 const u64 cache_begin = query.GetCacheAddr(); 182 const u64 cache_begin = query.GetCpuAddr();
181 const u64 cache_end = cache_begin + query.SizeInBytes(); 183 const u64 cache_end = cache_begin + query.SizeInBytes();
182 return cache_begin < addr_end && addr_begin < cache_end; 184 return cache_begin < addr_end && addr_begin < cache_end;
183 }; 185 };
@@ -193,7 +195,7 @@ private:
193 if (!in_range(query)) { 195 if (!in_range(query)) {
194 continue; 196 continue;
195 } 197 }
196 rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); 198 rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
197 query.Flush(); 199 query.Flush();
198 } 200 }
199 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), 201 contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
@@ -204,22 +206,21 @@ private:
204 /// Registers the passed parameters as cached and returns a pointer to the stored cached query. 206 /// Registers the passed parameters as cached and returns a pointer to the stored cached query.
205 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { 207 CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
206 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); 208 rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
207 const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; 209 const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT;
208 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, 210 return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
209 host_ptr); 211 host_ptr);
210 } 212 }
211 213
212 /// Tries to a get a cached query. Returns nullptr on failure. 214 /// Tries to a get a cached query. Returns nullptr on failure.
213 CachedQuery* TryGet(CacheAddr addr) { 215 CachedQuery* TryGet(VAddr addr) {
214 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; 216 const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
215 const auto it = cached_queries.find(page); 217 const auto it = cached_queries.find(page);
216 if (it == std::end(cached_queries)) { 218 if (it == std::end(cached_queries)) {
217 return nullptr; 219 return nullptr;
218 } 220 }
219 auto& contents = it->second; 221 auto& contents = it->second;
220 const auto found = 222 const auto found = std::find_if(std::begin(contents), std::end(contents),
221 std::find_if(std::begin(contents), std::end(contents), 223 [addr](auto& query) { return query.GetCpuAddr() == addr; });
222 [addr](auto& query) { return query.GetCacheAddr() == addr; });
223 return found != std::end(contents) ? &*found : nullptr; 224 return found != std::end(contents) ? &*found : nullptr;
224 } 225 }
225 226
@@ -323,14 +324,10 @@ public:
323 timestamp = timestamp_; 324 timestamp = timestamp_;
324 } 325 }
325 326
326 VAddr CpuAddr() const noexcept { 327 VAddr GetCpuAddr() const noexcept {
327 return cpu_addr; 328 return cpu_addr;
328 } 329 }
329 330
330 CacheAddr GetCacheAddr() const noexcept {
331 return ToCacheAddr(host_ptr);
332 }
333
334 u64 SizeInBytes() const noexcept { 331 u64 SizeInBytes() const noexcept {
335 return SizeInBytes(timestamp.has_value()); 332 return SizeInBytes(timestamp.has_value());
336 } 333 }
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 6de1597a2..22987751e 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -18,22 +18,14 @@
18 18
19class RasterizerCacheObject { 19class RasterizerCacheObject {
20public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr) 21 explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23 22
24 virtual ~RasterizerCacheObject(); 23 virtual ~RasterizerCacheObject();
25 24
26 CacheAddr GetCacheAddr() const { 25 VAddr GetCpuAddr() const {
27 return cache_addr; 26 return cpu_addr;
28 } 27 }
29 28
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
34 /// Gets the address of the shader in guest memory, required for cache management
35 virtual VAddr GetCpuAddr() const = 0;
36
37 /// Gets the size of the shader in guest memory, required for cache management 29 /// Gets the size of the shader in guest memory, required for cache management
38 virtual std::size_t GetSizeInBytes() const = 0; 30 virtual std::size_t GetSizeInBytes() const = 0;
39 31
@@ -68,8 +60,7 @@ private:
68 bool is_registered{}; ///< Whether the object is currently registered with the cache 60 bool is_registered{}; ///< Whether the object is currently registered with the cache
69 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 61 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
70 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 62 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
71 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region 63 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
72 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
73}; 64};
74 65
75template <class T> 66template <class T>
@@ -80,7 +71,7 @@ public:
80 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 71 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
81 72
82 /// Write any cached resources overlapping the specified region back to memory 73 /// Write any cached resources overlapping the specified region back to memory
83 void FlushRegion(CacheAddr addr, std::size_t size) { 74 void FlushRegion(VAddr addr, std::size_t size) {
84 std::lock_guard lock{mutex}; 75 std::lock_guard lock{mutex};
85 76
86 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 77 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -90,7 +81,7 @@ public:
90 } 81 }
91 82
92 /// Mark the specified region as being invalidated 83 /// Mark the specified region as being invalidated
93 void InvalidateRegion(CacheAddr addr, u64 size) { 84 void InvalidateRegion(VAddr addr, u64 size) {
94 std::lock_guard lock{mutex}; 85 std::lock_guard lock{mutex};
95 86
96 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 87 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
@@ -114,27 +105,20 @@ public:
114 105
115protected: 106protected:
116 /// Tries to get an object from the cache with the specified cache address 107 /// Tries to get an object from the cache with the specified cache address
117 T TryGet(CacheAddr addr) const { 108 T TryGet(VAddr addr) const {
118 const auto iter = map_cache.find(addr); 109 const auto iter = map_cache.find(addr);
119 if (iter != map_cache.end()) 110 if (iter != map_cache.end())
120 return iter->second; 111 return iter->second;
121 return nullptr; 112 return nullptr;
122 } 113 }
123 114
124 T TryGet(const void* addr) const {
125 const auto iter = map_cache.find(ToCacheAddr(addr));
126 if (iter != map_cache.end())
127 return iter->second;
128 return nullptr;
129 }
130
131 /// Register an object into the cache 115 /// Register an object into the cache
132 virtual void Register(const T& object) { 116 virtual void Register(const T& object) {
133 std::lock_guard lock{mutex}; 117 std::lock_guard lock{mutex};
134 118
135 object->SetIsRegistered(true); 119 object->SetIsRegistered(true);
136 interval_cache.add({GetInterval(object), ObjectSet{object}}); 120 interval_cache.add({GetInterval(object), ObjectSet{object}});
137 map_cache.insert({object->GetCacheAddr(), object}); 121 map_cache.insert({object->GetCpuAddr(), object});
138 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); 122 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
139 } 123 }
140 124
@@ -144,7 +128,7 @@ protected:
144 128
145 object->SetIsRegistered(false); 129 object->SetIsRegistered(false);
146 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); 130 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
147 const CacheAddr addr = object->GetCacheAddr(); 131 const VAddr addr = object->GetCpuAddr();
148 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 132 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
149 map_cache.erase(addr); 133 map_cache.erase(addr);
150 } 134 }
@@ -173,7 +157,7 @@ protected:
173 157
174private: 158private:
175 /// Returns a list of cached objects from the specified memory region, ordered by access time 159 /// Returns a list of cached objects from the specified memory region, ordered by access time
176 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { 160 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
177 if (size == 0) { 161 if (size == 0) {
178 return {}; 162 return {};
179 } 163 }
@@ -197,13 +181,13 @@ private:
197 } 181 }
198 182
199 using ObjectSet = std::set<T>; 183 using ObjectSet = std::set<T>;
200 using ObjectCache = std::unordered_map<CacheAddr, T>; 184 using ObjectCache = std::unordered_map<VAddr, T>;
201 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; 185 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
202 using ObjectInterval = typename IntervalCache::interval_type; 186 using ObjectInterval = typename IntervalCache::interval_type;
203 187
204 static auto GetInterval(const T& object) { 188 static auto GetInterval(const T& object) {
205 return ObjectInterval::right_open(object->GetCacheAddr(), 189 return ObjectInterval::right_open(object->GetCpuAddr(),
206 object->GetCacheAddr() + object->GetSizeInBytes()); 190 object->GetCpuAddr() + object->GetSizeInBytes());
207 } 191 }
208 192
209 ObjectCache map_cache; 193 ObjectCache map_cache;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1a68e3caa..8ae5b9c4e 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -53,14 +53,14 @@ public:
53 virtual void FlushAll() = 0; 53 virtual void FlushAll() = 0;
54 54
55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
56 virtual void FlushRegion(CacheAddr addr, u64 size) = 0; 56 virtual void FlushRegion(VAddr addr, u64 size) = 0;
57 57
58 /// Notify rasterizer that any caches of the specified region should be invalidated 58 /// Notify rasterizer that any caches of the specified region should be invalidated
59 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; 59 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
60 60
61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
62 /// and invalidated 62 /// and invalidated
63 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; 63 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
64 64
65 /// Notify the rasterizer to send all written commands to the host GPU. 65 /// Notify the rasterizer to send all written commands to the host GPU.
66 virtual void FlushCommands() = 0; 66 virtual void FlushCommands() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 0375fca17..4eb37a96c 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
21 21
22MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); 22MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
23 23
24CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) 24CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size)
25 : VideoCommon::BufferBlock{cache_addr, size} { 25 : VideoCommon::BufferBlock{cpu_addr, size} {
26 gl_buffer.Create(); 26 gl_buffer.Create();
27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); 27 glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
28} 28}
@@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() {
47 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); 47 glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
48} 48}
49 49
50Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { 50Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
51 return std::make_shared<CachedBufferBlock>(cache_addr, size); 51 return std::make_shared<CachedBufferBlock>(cpu_addr, size);
52} 52}
53 53
54void OGLBufferCache::WriteBarrier() { 54void OGLBufferCache::WriteBarrier() {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c7145443..d94a11252 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf
31 31
32class CachedBufferBlock : public VideoCommon::BufferBlock { 32class CachedBufferBlock : public VideoCommon::BufferBlock {
33public: 33public:
34 explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); 34 explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
35 ~CachedBufferBlock(); 35 ~CachedBufferBlock();
36 36
37 const GLuint* GetHandle() const { 37 const GLuint* GetHandle() const {
@@ -55,7 +55,7 @@ public:
55 } 55 }
56 56
57protected: 57protected:
58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; 58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
59 59
60 void WriteBarrier() override; 60 void WriteBarrier() override;
61 61
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 1a2e2a9f7..c286502ba 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
131 return bindings; 131 return bindings;
132} 132}
133 133
134bool IsASTCSupported() {
135 static constexpr std::array formats = {
136 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
137 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
138 GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR,
139 GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR,
140 GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR,
141 GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR,
142 GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR,
143 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR,
144 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR,
145 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR,
146 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR,
147 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR,
148 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
149 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
150 };
151 return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) {
152 GLint supported;
153 glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1,
154 &supported);
155 return supported == GL_TRUE;
156 }) == formats.end();
157}
158
134} // Anonymous namespace 159} // Anonymous namespace
135 160
136Device::Device() : base_bindings{BuildBaseBindings()} { 161Device::Device() : base_bindings{BuildBaseBindings()} {
@@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
152 has_shader_ballot = GLAD_GL_ARB_shader_ballot; 177 has_shader_ballot = GLAD_GL_ARB_shader_ballot;
153 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; 178 has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
154 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); 179 has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
180 has_astc = IsASTCSupported();
155 has_variable_aoffi = TestVariableAoffi(); 181 has_variable_aoffi = TestVariableAoffi();
156 has_component_indexing_bug = is_amd; 182 has_component_indexing_bug = is_amd;
157 has_precise_bug = TestPreciseBug(); 183 has_precise_bug = TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index d73b099d0..a55050cb5 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -64,6 +64,10 @@ public:
64 return has_image_load_formatted; 64 return has_image_load_formatted;
65 } 65 }
66 66
67 bool HasASTC() const {
68 return has_astc;
69 }
70
67 bool HasVariableAoffi() const { 71 bool HasVariableAoffi() const {
68 return has_variable_aoffi; 72 return has_variable_aoffi;
69 } 73 }
@@ -97,6 +101,7 @@ private:
97 bool has_shader_ballot{}; 101 bool has_shader_ballot{};
98 bool has_vertex_viewport_layer{}; 102 bool has_vertex_viewport_layer{};
99 bool has_image_load_formatted{}; 103 bool has_image_load_formatted{};
104 bool has_astc{};
100 bool has_variable_aoffi{}; 105 bool has_variable_aoffi{};
101 bool has_component_indexing_bug{}; 106 bool has_component_indexing_bug{};
102 bool has_precise_bug{}; 107 bool has_precise_bug{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 31add708f..f4598fbf7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() {
140 const auto attrib = gpu.regs.vertex_attrib_format[index]; 140 const auto attrib = gpu.regs.vertex_attrib_format[index];
141 const auto gl_index = static_cast<GLuint>(index); 141 const auto gl_index = static_cast<GLuint>(index);
142 142
143 // Ignore invalid attributes. 143 // Disable constant attributes.
144 if (!attrib.IsValid()) { 144 if (attrib.IsConstant()) {
145 glDisableVertexAttribArray(gl_index); 145 glDisableVertexAttribArray(gl_index);
146 continue; 146 continue;
147 } 147 }
@@ -345,7 +345,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
345 345
346 texture_cache.GuardRenderTargets(true); 346 texture_cache.GuardRenderTargets(true);
347 347
348 View depth_surface = texture_cache.GetDepthBufferSurface(true); 348 View depth_surface = texture_cache.GetDepthBufferSurface();
349 349
350 const auto& regs = gpu.regs; 350 const auto& regs = gpu.regs;
351 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); 351 UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
@@ -354,7 +354,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
354 FramebufferCacheKey key; 354 FramebufferCacheKey key;
355 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); 355 const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
356 for (std::size_t index = 0; index < colors_count; ++index) { 356 for (std::size_t index = 0; index < colors_count; ++index) {
357 View color_surface{texture_cache.GetColorBufferSurface(index, true)}; 357 View color_surface{texture_cache.GetColorBufferSurface(index)};
358 if (!color_surface) { 358 if (!color_surface) {
359 continue; 359 continue;
360 } 360 }
@@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using
386 texture_cache.GuardRenderTargets(true); 386 texture_cache.GuardRenderTargets(true);
387 View color_surface; 387 View color_surface;
388 if (using_color_fb) { 388 if (using_color_fb) {
389 color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); 389 const std::size_t index = regs.clear_buffers.RT;
390 color_surface = texture_cache.GetColorBufferSurface(index);
391 texture_cache.MarkColorBufferInUse(index);
390 } 392 }
391 View depth_surface; 393 View depth_surface;
392 if (using_depth_fb || using_stencil_fb) { 394 if (using_depth_fb || using_stencil_fb) {
393 depth_surface = texture_cache.GetDepthBufferSurface(false); 395 depth_surface = texture_cache.GetDepthBufferSurface();
396 texture_cache.MarkDepthBufferInUse();
394 } 397 }
395 texture_cache.GuardRenderTargets(false); 398 texture_cache.GuardRenderTargets(false);
396 399
@@ -493,6 +496,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
493 SyncPrimitiveRestart(); 496 SyncPrimitiveRestart();
494 SyncScissorTest(); 497 SyncScissorTest();
495 SyncPointState(); 498 SyncPointState();
499 SyncLineState();
496 SyncPolygonOffset(); 500 SyncPolygonOffset();
497 SyncAlphaTest(); 501 SyncAlphaTest();
498 SyncFramebufferSRGB(); 502 SyncFramebufferSRGB();
@@ -653,9 +657,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
653 657
654void RasterizerOpenGL::FlushAll() {} 658void RasterizerOpenGL::FlushAll() {}
655 659
656void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { 660void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
657 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 661 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
658 if (!addr || !size) { 662 if (addr == 0 || size == 0) {
659 return; 663 return;
660 } 664 }
661 texture_cache.FlushRegion(addr, size); 665 texture_cache.FlushRegion(addr, size);
@@ -663,9 +667,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
663 query_cache.FlushRegion(addr, size); 667 query_cache.FlushRegion(addr, size);
664} 668}
665 669
666void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 670void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
667 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 671 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
668 if (!addr || !size) { 672 if (addr == 0 || size == 0) {
669 return; 673 return;
670 } 674 }
671 texture_cache.InvalidateRegion(addr, size); 675 texture_cache.InvalidateRegion(addr, size);
@@ -674,7 +678,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
674 query_cache.InvalidateRegion(addr, size); 678 query_cache.InvalidateRegion(addr, size);
675} 679}
676 680
677void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 681void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
678 if (Settings::values.use_accurate_gpu_emulation) { 682 if (Settings::values.use_accurate_gpu_emulation) {
679 FlushRegion(addr, size); 683 FlushRegion(addr, size);
680 } 684 }
@@ -713,8 +717,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
713 717
714 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 718 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
715 719
716 const auto surface{ 720 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
717 texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))};
718 if (!surface) { 721 if (!surface) {
719 return {}; 722 return {};
720 } 723 }
@@ -1309,6 +1312,19 @@ void RasterizerOpenGL::SyncPointState() {
1309 glDisable(GL_PROGRAM_POINT_SIZE); 1312 glDisable(GL_PROGRAM_POINT_SIZE);
1310} 1313}
1311 1314
1315void RasterizerOpenGL::SyncLineState() {
1316 auto& gpu = system.GPU().Maxwell3D();
1317 auto& flags = gpu.dirty.flags;
1318 if (!flags[Dirty::LineWidth]) {
1319 return;
1320 }
1321 flags[Dirty::LineWidth] = false;
1322
1323 const auto& regs = gpu.regs;
1324 oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
1325 glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
1326}
1327
1312void RasterizerOpenGL::SyncPolygonOffset() { 1328void RasterizerOpenGL::SyncPolygonOffset() {
1313 auto& gpu = system.GPU().Maxwell3D(); 1329 auto& gpu = system.GPU().Maxwell3D();
1314 auto& flags = gpu.dirty.flags; 1330 auto& flags = gpu.dirty.flags;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2d3be2437..435da4425 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -65,9 +65,9 @@ public:
65 void ResetCounter(VideoCore::QueryType type) override; 65 void ResetCounter(VideoCore::QueryType type) override;
66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
67 void FlushAll() override; 67 void FlushAll() override;
68 void FlushRegion(CacheAddr addr, u64 size) override; 68 void FlushRegion(VAddr addr, u64 size) override;
69 void InvalidateRegion(CacheAddr addr, u64 size) override; 69 void InvalidateRegion(VAddr addr, u64 size) override;
70 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 70 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
71 void FlushCommands() override; 71 void FlushCommands() override;
72 void TickFrame() override; 72 void TickFrame() override;
73 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 73 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -171,6 +171,9 @@ private:
171 /// Syncs the point state to match the guest state 171 /// Syncs the point state to match the guest state
172 void SyncPointState(); 172 void SyncPointState();
173 173
174 /// Syncs the line state to match the guest state
175 void SyncLineState();
176
174 /// Syncs the rasterizer enable state to match the guest state 177 /// Syncs the rasterizer enable state to match the guest state
175 void SyncRasterizeEnable(); 178 void SyncRasterizeEnable();
176 179
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 046ee55a5..12c6dcfde 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,6 +34,8 @@
34namespace OpenGL { 34namespace OpenGL {
35 35
36using Tegra::Engines::ShaderType; 36using Tegra::Engines::ShaderType;
37using VideoCommon::Shader::CompileDepth;
38using VideoCommon::Shader::CompilerSettings;
37using VideoCommon::Shader::ProgramCode; 39using VideoCommon::Shader::ProgramCode;
38using VideoCommon::Shader::Registry; 40using VideoCommon::Shader::Registry;
39using VideoCommon::Shader::ShaderIR; 41using VideoCommon::Shader::ShaderIR;
@@ -43,7 +45,7 @@ namespace {
43constexpr u32 STAGE_MAIN_OFFSET = 10; 45constexpr u32 STAGE_MAIN_OFFSET = 10;
44constexpr u32 KERNEL_MAIN_OFFSET = 0; 46constexpr u32 KERNEL_MAIN_OFFSET = 0;
45 47
46constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; 48constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile};
47 49
48/// Gets the address for the specified shader stage program 50/// Gets the address for the specified shader stage program
49GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { 51GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -214,11 +216,11 @@ std::unordered_set<GLenum> GetSupportedFormats() {
214 216
215} // Anonymous namespace 217} // Anonymous namespace
216 218
217CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, 219CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
218 std::shared_ptr<VideoCommon::Shader::Registry> registry, 220 std::shared_ptr<VideoCommon::Shader::Registry> registry,
219 ShaderEntries entries, std::shared_ptr<OGLProgram> program) 221 ShaderEntries entries, std::shared_ptr<OGLProgram> program)
220 : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, 222 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
221 cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} 223 size_in_bytes{size_in_bytes}, program{std::move(program)} {}
222 224
223CachedShader::~CachedShader() = default; 225CachedShader::~CachedShader() = default;
224 226
@@ -254,9 +256,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
254 entry.bindless_samplers = registry->GetBindlessSamplers(); 256 entry.bindless_samplers = registry->GetBindlessSamplers();
255 params.disk_cache.SaveEntry(std::move(entry)); 257 params.disk_cache.SaveEntry(std::move(entry));
256 258
257 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, 259 return std::shared_ptr<CachedShader>(new CachedShader(
258 size_in_bytes, std::move(registry), 260 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
259 MakeEntries(ir), std::move(program)));
260} 261}
261 262
262Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 263Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -279,17 +280,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
279 entry.bindless_samplers = registry->GetBindlessSamplers(); 280 entry.bindless_samplers = registry->GetBindlessSamplers();
280 params.disk_cache.SaveEntry(std::move(entry)); 281 params.disk_cache.SaveEntry(std::move(entry));
281 282
282 return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, 283 return std::shared_ptr<CachedShader>(new CachedShader(
283 size_in_bytes, std::move(registry), 284 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program)));
284 MakeEntries(ir), std::move(program)));
285} 285}
286 286
287Shader CachedShader::CreateFromCache(const ShaderParameters& params, 287Shader CachedShader::CreateFromCache(const ShaderParameters& params,
288 const PrecompiledShader& precompiled_shader, 288 const PrecompiledShader& precompiled_shader,
289 std::size_t size_in_bytes) { 289 std::size_t size_in_bytes) {
290 return std::shared_ptr<CachedShader>(new CachedShader( 290 return std::shared_ptr<CachedShader>(
291 params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, 291 new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
292 precompiled_shader.entries, precompiled_shader.program)); 292 precompiled_shader.entries, precompiled_shader.program));
293} 293}
294 294
295ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 295ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -449,12 +449,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
449 const GPUVAddr address{GetShaderAddress(system, program)}; 449 const GPUVAddr address{GetShaderAddress(system, program)};
450 450
451 // Look up shader in the cache based on address 451 // Look up shader in the cache based on address
452 const auto host_ptr{memory_manager.GetPointer(address)}; 452 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
453 Shader shader{TryGet(host_ptr)}; 453 Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
454 if (shader) { 454 if (shader) {
455 return last_shaders[static_cast<std::size_t>(program)] = shader; 455 return last_shaders[static_cast<std::size_t>(program)] = shader;
456 } 456 }
457 457
458 const auto host_ptr{memory_manager.GetPointer(address)};
459
458 // No shader found - create a new one 460 // No shader found - create a new one
459 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; 461 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
460 ProgramCode code_b; 462 ProgramCode code_b;
@@ -465,9 +467,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
465 467
466 const auto unique_identifier = GetUniqueIdentifier( 468 const auto unique_identifier = GetUniqueIdentifier(
467 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); 469 GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
468 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; 470
469 const ShaderParameters params{system, disk_cache, device, 471 const ShaderParameters params{system, disk_cache, device,
470 cpu_addr, host_ptr, unique_identifier}; 472 *cpu_addr, host_ptr, unique_identifier};
471 473
472 const auto found = runtime_cache.find(unique_identifier); 474 const auto found = runtime_cache.find(unique_identifier);
473 if (found == runtime_cache.end()) { 475 if (found == runtime_cache.end()) {
@@ -484,18 +486,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
484 486
485Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 487Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
486 auto& memory_manager{system.GPU().MemoryManager()}; 488 auto& memory_manager{system.GPU().MemoryManager()};
487 const auto host_ptr{memory_manager.GetPointer(code_addr)}; 489 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
488 auto kernel = TryGet(host_ptr); 490
491 auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
489 if (kernel) { 492 if (kernel) {
490 return kernel; 493 return kernel;
491 } 494 }
492 495
496 const auto host_ptr{memory_manager.GetPointer(code_addr)};
493 // No kernel found, create a new one 497 // No kernel found, create a new one
494 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 498 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
495 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; 499 const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
496 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 500
497 const ShaderParameters params{system, disk_cache, device, 501 const ShaderParameters params{system, disk_cache, device,
498 cpu_addr, host_ptr, unique_identifier}; 502 *cpu_addr, host_ptr, unique_identifier};
499 503
500 const auto found = runtime_cache.find(unique_identifier); 504 const auto found = runtime_cache.find(unique_identifier);
501 if (found == runtime_cache.end()) { 505 if (found == runtime_cache.end()) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 4935019fc..c836df5bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -65,11 +65,6 @@ public:
65 /// Gets the GL program handle for the shader 65 /// Gets the GL program handle for the shader
66 GLuint GetHandle() const; 66 GLuint GetHandle() const;
67 67
68 /// Returns the guest CPU address of the shader
69 VAddr GetCpuAddr() const override {
70 return cpu_addr;
71 }
72
73 /// Returns the size in bytes of the shader 68 /// Returns the size in bytes of the shader
74 std::size_t GetSizeInBytes() const override { 69 std::size_t GetSizeInBytes() const override {
75 return size_in_bytes; 70 return size_in_bytes;
@@ -90,13 +85,12 @@ public:
90 std::size_t size_in_bytes); 85 std::size_t size_in_bytes);
91 86
92private: 87private:
93 explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, 88 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
94 std::shared_ptr<VideoCommon::Shader::Registry> registry, 89 std::shared_ptr<VideoCommon::Shader::Registry> registry,
95 ShaderEntries entries, std::shared_ptr<OGLProgram> program); 90 ShaderEntries entries, std::shared_ptr<OGLProgram> program);
96 91
97 std::shared_ptr<VideoCommon::Shader::Registry> registry; 92 std::shared_ptr<VideoCommon::Shader::Registry> registry;
98 ShaderEntries entries; 93 ShaderEntries entries;
99 VAddr cpu_addr = 0;
100 std::size_t size_in_bytes = 0; 94 std::size_t size_in_bytes = 0;
101 std::shared_ptr<OGLProgram> program; 95 std::shared_ptr<OGLProgram> program;
102}; 96};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a25280a47..b1804e9ea 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -31,11 +31,11 @@ namespace {
31 31
32using Tegra::Engines::ShaderType; 32using Tegra::Engines::ShaderType;
33using Tegra::Shader::Attribute; 33using Tegra::Shader::Attribute;
34using Tegra::Shader::AttributeUse;
35using Tegra::Shader::Header; 34using Tegra::Shader::Header;
36using Tegra::Shader::IpaInterpMode; 35using Tegra::Shader::IpaInterpMode;
37using Tegra::Shader::IpaMode; 36using Tegra::Shader::IpaMode;
38using Tegra::Shader::IpaSampleMode; 37using Tegra::Shader::IpaSampleMode;
38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using VideoCommon::Shader::BuildTransformFeedback; 40using VideoCommon::Shader::BuildTransformFeedback;
41using VideoCommon::Shader::Registry; 41using VideoCommon::Shader::Registry;
@@ -702,20 +702,19 @@ private:
702 code.AddNewLine(); 702 code.AddNewLine();
703 } 703 }
704 704
705 std::string GetInputFlags(AttributeUse attribute) { 705 const char* GetInputFlags(PixelImap attribute) {
706 switch (attribute) { 706 switch (attribute) {
707 case AttributeUse::Perspective: 707 case PixelImap::Perspective:
708 // Default, Smooth 708 return "smooth";
709 return {}; 709 case PixelImap::Constant:
710 case AttributeUse::Constant: 710 return "flat";
711 return "flat "; 711 case PixelImap::ScreenLinear:
712 case AttributeUse::ScreenLinear: 712 return "noperspective";
713 return "noperspective "; 713 case PixelImap::Unused:
714 default: 714 break;
715 case AttributeUse::Unused:
716 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
717 return {};
718 } 715 }
716 UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute));
717 return {};
719 } 718 }
720 719
721 void DeclareInputAttributes() { 720 void DeclareInputAttributes() {
@@ -749,8 +748,8 @@ private:
749 748
750 std::string suffix; 749 std::string suffix;
751 if (stage == ShaderType::Fragment) { 750 if (stage == ShaderType::Fragment) {
752 const auto input_mode{header.ps.GetAttributeUse(location)}; 751 const auto input_mode{header.ps.GetPixelImap(location)};
753 if (skip_unused && input_mode == AttributeUse::Unused) { 752 if (input_mode == PixelImap::Unused) {
754 return; 753 return;
755 } 754 }
756 suffix = GetInputFlags(input_mode); 755 suffix = GetInputFlags(input_mode);
@@ -927,7 +926,7 @@ private:
927 const u32 address{generic_base + index * generic_stride + element * element_stride}; 926 const u32 address{generic_base + index * generic_stride + element * element_stride};
928 927
929 const bool declared = stage != ShaderType::Fragment || 928 const bool declared = stage != ShaderType::Fragment ||
930 header.ps.GetAttributeUse(index) != AttributeUse::Unused; 929 header.ps.GetPixelImap(index) != PixelImap::Unused;
931 const std::string value = 930 const std::string value =
932 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; 931 declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
933 code.AddLine("case 0x{:X}U: return {};", address, value); 932 code.AddLine("case 0x{:X}U: return {};", address, value);
@@ -1142,8 +1141,7 @@ private:
1142 GetSwizzle(element)), 1141 GetSwizzle(element)),
1143 Type::Float}; 1142 Type::Float};
1144 case ShaderType::Fragment: 1143 case ShaderType::Fragment:
1145 return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), 1144 return {"gl_FragCoord"s + GetSwizzle(element), Type::Float};
1146 Type::Float};
1147 default: 1145 default:
1148 UNREACHABLE(); 1146 UNREACHABLE();
1149 } 1147 }
@@ -1821,15 +1819,17 @@ private:
1821 } 1819 }
1822 1820
1823 Expression HMergeH0(Operation operation) { 1821 Expression HMergeH0(Operation operation) {
1824 std::string dest = VisitOperand(operation, 0).AsUint(); 1822 const std::string dest = VisitOperand(operation, 0).AsUint();
1825 std::string src = VisitOperand(operation, 1).AsUint(); 1823 const std::string src = VisitOperand(operation, 1).AsUint();
1826 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; 1824 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest),
1825 Type::HalfFloat};
1827 } 1826 }
1828 1827
1829 Expression HMergeH1(Operation operation) { 1828 Expression HMergeH1(Operation operation) {
1830 std::string dest = VisitOperand(operation, 0).AsUint(); 1829 const std::string dest = VisitOperand(operation, 0).AsUint();
1831 std::string src = VisitOperand(operation, 1).AsUint(); 1830 const std::string src = VisitOperand(operation, 1).AsUint();
1832 return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; 1831 return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src),
1832 Type::HalfFloat};
1833 } 1833 }
1834 1834
1835 Expression HPack2(Operation operation) { 1835 Expression HPack2(Operation operation) {
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 255ac3147..d24fad3de 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -185,6 +185,12 @@ void SetupDirtyPointSize(Tables& tables) {
185 tables[0][OFF(point_sprite_enable)] = PointSize; 185 tables[0][OFF(point_sprite_enable)] = PointSize;
186} 186}
187 187
188void SetupDirtyLineWidth(Tables& tables) {
189 tables[0][OFF(line_width_smooth)] = LineWidth;
190 tables[0][OFF(line_width_aliased)] = LineWidth;
191 tables[0][OFF(line_smooth_enable)] = LineWidth;
192}
193
188void SetupDirtyClipControl(Tables& tables) { 194void SetupDirtyClipControl(Tables& tables) {
189 auto& table = tables[0]; 195 auto& table = tables[0];
190 table[OFF(screen_y_control)] = ClipControl; 196 table[OFF(screen_y_control)] = ClipControl;
@@ -233,6 +239,7 @@ void StateTracker::Initialize() {
233 SetupDirtyLogicOp(tables); 239 SetupDirtyLogicOp(tables);
234 SetupDirtyFragmentClampColor(tables); 240 SetupDirtyFragmentClampColor(tables);
235 SetupDirtyPointSize(tables); 241 SetupDirtyPointSize(tables);
242 SetupDirtyLineWidth(tables);
236 SetupDirtyClipControl(tables); 243 SetupDirtyClipControl(tables);
237 SetupDirtyDepthClampEnabled(tables); 244 SetupDirtyDepthClampEnabled(tables);
238 SetupDirtyMisc(tables); 245 SetupDirtyMisc(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index b882d75c3..0f823288e 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -78,6 +78,7 @@ enum : u8 {
78 LogicOp, 78 LogicOp,
79 FragmentClampColor, 79 FragmentClampColor,
80 PointSize, 80 PointSize,
81 LineWidth,
81 ClipControl, 82 ClipControl,
82 DepthClampEnabled, 83 DepthClampEnabled,
83 84
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index f424e3000..2729d1265 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource;
24using VideoCore::MortonSwizzleMode; 24using VideoCore::MortonSwizzleMode;
25 25
26using VideoCore::Surface::PixelFormat; 26using VideoCore::Surface::PixelFormat;
27using VideoCore::Surface::SurfaceCompression;
28using VideoCore::Surface::SurfaceTarget; 27using VideoCore::Surface::SurfaceTarget;
29using VideoCore::Surface::SurfaceType; 28using VideoCore::Surface::SurfaceType;
30 29
@@ -37,102 +36,100 @@ namespace {
37 36
38struct FormatTuple { 37struct FormatTuple {
39 GLint internal_format; 38 GLint internal_format;
40 GLenum format; 39 GLenum format = GL_NONE;
41 GLenum type; 40 GLenum type = GL_NONE;
42 bool compressed;
43}; 41};
44 42
45constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ 43constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
46 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U 44 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U
47 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S 45 {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S
48 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI 46 {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI
49 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U 47 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U
50 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U 48 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U
51 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U 49 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U
52 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U 50 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U
53 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI 51 {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI
54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F 52 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F
55 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U 53 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U
56 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S 54 {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S
57 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI 55 {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI
58 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F 56 {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F
59 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI 57 {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI
60 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 58 {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1
61 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 59 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23
62 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 60 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45
63 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 61 {GL_COMPRESSED_RED_RGTC1}, // DXN1
64 {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM 62 {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM
65 {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM 63 {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM
66 {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U 64 {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U
67 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16 65 {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16
68 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16 66 {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16
69 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4 67 {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4
70 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 68 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
71 {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F 69 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F
72 {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F 70 {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F
73 {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F 71 {GL_R32F, GL_RED, GL_FLOAT}, // R32F
74 {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F 72 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F
75 {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U 73 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U
76 {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S 74 {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S
77 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI 75 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI
78 {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I 76 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I
79 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16 77 {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16
80 {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F 78 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F
81 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI 79 {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI
82 {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I 80 {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I
83 {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S 81 {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S
84 {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F 82 {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F
85 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB 83 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB
86 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U 84 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U
87 {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S 85 {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S
88 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI 86 {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI
89 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F 87 {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F
90 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI 88 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI
91 {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I 89 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I
92 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 90 {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8
93 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 91 {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5
94 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 92 {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4
95 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 93 {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8
96 // Compressed sRGB formats 94 // Compressed sRGB formats
97 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB 95 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB
98 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB 96 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB
99 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB 97 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB
100 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB 98 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB
101 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U 99 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U
102 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB 100 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
103 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB 101 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
104 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB 102 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
105 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB 103 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
106 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5 104 {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5
107 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB 105 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
108 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8 106 {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8
109 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB 107 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
110 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6 108 {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6
111 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB 109 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
112 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10 110 {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10
113 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB 111 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
114 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12 112 {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12
115 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB 113 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
116 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6 114 {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6
117 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB 115 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
118 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5 116 {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5
119 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB 117 {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
120 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F 118 {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F
121 119
122 // Depth formats 120 // Depth formats
123 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F 121 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F
124 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16 122 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16
125 123
126 // DepthStencil formats 124 // DepthStencil formats
127 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8 125 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8
128 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24 126 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24
129 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8 127 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8
130}}; 128}};
131 129
132const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { 130const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
133 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); 131 ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
134 const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; 132 return tex_format_tuples[static_cast<std::size_t>(pixel_format)];
135 return format;
136} 133}
137 134
138GLenum GetTextureTarget(const SurfaceTarget& target) { 135GLenum GetTextureTarget(const SurfaceTarget& target) {
@@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
242 239
243} // Anonymous namespace 240} // Anonymous namespace
244 241
245CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) 242CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
246 : VideoCommon::SurfaceBase<View>(gpu_addr, params) { 243 bool is_astc_supported)
247 const auto& tuple{GetFormatTuple(params.pixel_format)}; 244 : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) {
248 internal_format = tuple.internal_format; 245 if (is_converted) {
249 format = tuple.format; 246 internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8;
250 type = tuple.type; 247 format = GL_RGBA;
251 is_compressed = tuple.compressed; 248 type = GL_UNSIGNED_BYTE;
249 } else {
250 const auto& tuple{GetFormatTuple(params.pixel_format)};
251 internal_format = tuple.internal_format;
252 format = tuple.format;
253 type = tuple.type;
254 is_compressed = params.IsCompressed();
255 }
252 target = GetTextureTarget(params.target); 256 target = GetTextureTarget(params.target);
253 texture = CreateTexture(params, target, internal_format, texture_buffer); 257 texture = CreateTexture(params, target, internal_format, texture_buffer);
254 DecorateSurfaceName(); 258 DecorateSurfaceName();
@@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
264 268
265 if (params.IsBuffer()) { 269 if (params.IsBuffer()) {
266 glGetNamedBufferSubData(texture_buffer.handle, 0, 270 glGetNamedBufferSubData(texture_buffer.handle, 0,
267 static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), 271 static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)),
268 staging_buffer.data()); 272 staging_buffer.data());
269 return; 273 return;
270 } 274 }
@@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
272 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); 276 SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
273 277
274 for (u32 level = 0; level < params.emulated_levels; ++level) { 278 for (u32 level = 0; level < params.emulated_levels; ++level) {
275 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); 279 glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
276 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 280 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
277 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); 281 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
282
278 u8* const mip_data = staging_buffer.data() + mip_offset; 283 u8* const mip_data = staging_buffer.data() + mip_offset;
279 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); 284 const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
280 if (is_compressed) { 285 if (is_compressed) {
@@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
294} 299}
295 300
296void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { 301void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) {
297 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); 302 glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted)));
298 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); 303 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
299 304
300 auto compression_type = params.GetCompressionType(); 305 const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted);
301
302 const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
303 ? params.GetConvertedMipmapOffset(level)
304 : params.GetHostMipmapLevelOffset(level);
305 const u8* buffer{staging_buffer.data() + mip_offset}; 306 const u8* buffer{staging_buffer.data() + mip_offset};
306 if (is_compressed) { 307 if (is_compressed) {
307 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; 308 const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))};
@@ -410,14 +411,13 @@ CachedSurfaceView::~CachedSurfaceView() = default;
410void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { 411void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
411 ASSERT(params.num_levels == 1); 412 ASSERT(params.num_levels == 1);
412 413
413 const GLuint texture = surface.GetTexture();
414 if (params.num_layers > 1) { 414 if (params.num_layers > 1) {
415 // Layered framebuffer attachments 415 // Layered framebuffer attachments
416 UNIMPLEMENTED_IF(params.base_layer != 0); 416 UNIMPLEMENTED_IF(params.base_layer != 0);
417 417
418 switch (params.target) { 418 switch (params.target) {
419 case SurfaceTarget::Texture2DArray: 419 case SurfaceTarget::Texture2DArray:
420 glFramebufferTexture(target, attachment, texture, params.base_level); 420 glFramebufferTexture(target, attachment, GetTexture(), 0);
421 break; 421 break;
422 default: 422 default:
423 UNIMPLEMENTED(); 423 UNIMPLEMENTED();
@@ -426,6 +426,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
426 } 426 }
427 427
428 const GLenum view_target = surface.GetTarget(); 428 const GLenum view_target = surface.GetTarget();
429 const GLuint texture = surface.GetTexture();
429 switch (surface.GetSurfaceParams().target) { 430 switch (surface.GetSurfaceParams().target) {
430 case SurfaceTarget::Texture1D: 431 case SurfaceTarget::Texture1D:
431 glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); 432 glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
@@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
482TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, 483TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
483 VideoCore::RasterizerInterface& rasterizer, 484 VideoCore::RasterizerInterface& rasterizer,
484 const Device& device, StateTracker& state_tracker) 485 const Device& device, StateTracker& state_tracker)
485 : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { 486 : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
486 src_framebuffer.Create(); 487 src_framebuffer.Create();
487 dst_framebuffer.Create(); 488 dst_framebuffer.Create();
488} 489}
@@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
490TextureCacheOpenGL::~TextureCacheOpenGL() = default; 491TextureCacheOpenGL::~TextureCacheOpenGL() = default;
491 492
492Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { 493Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
493 return std::make_shared<CachedSurface>(gpu_addr, params); 494 return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported);
494} 495}
495 496
496void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, 497void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface,
@@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
596 597
597 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); 598 glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
598 599
599 if (source_format.compressed) { 600 if (src_surface->IsCompressed()) {
600 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), 601 glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size),
601 nullptr); 602 nullptr);
602 } else { 603 } else {
@@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
610 const GLsizei width = static_cast<GLsizei>(dst_params.width); 611 const GLsizei width = static_cast<GLsizei>(dst_params.width);
611 const GLsizei height = static_cast<GLsizei>(dst_params.height); 612 const GLsizei height = static_cast<GLsizei>(dst_params.height);
612 const GLsizei depth = static_cast<GLsizei>(dst_params.depth); 613 const GLsizei depth = static_cast<GLsizei>(dst_params.depth);
613 if (dest_format.compressed) { 614 if (dst_surface->IsCompressed()) {
614 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); 615 LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!");
615 UNREACHABLE(); 616 UNREACHABLE();
616 } else { 617 } else {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 6658c6ffd..02d9981a1 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
37 friend CachedSurfaceView; 37 friend CachedSurfaceView;
38 38
39public: 39public:
40 explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); 40 explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported);
41 ~CachedSurface(); 41 ~CachedSurface();
42 42
43 void UploadTexture(const std::vector<u8>& staging_buffer) override; 43 void UploadTexture(const std::vector<u8>& staging_buffer) override;
@@ -51,6 +51,10 @@ public:
51 return texture.handle; 51 return texture.handle;
52 } 52 }
53 53
54 bool IsCompressed() const {
55 return is_compressed;
56 }
57
54protected: 58protected:
55 void DecorateSurfaceName() override; 59 void DecorateSurfaceName() override;
56 60
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index f1a28cc21..b2a179746 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -315,8 +315,8 @@ public:
315 315
316RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, 316RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
317 Core::Frontend::GraphicsContext& context) 317 Core::Frontend::GraphicsContext& context)
318 : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, 318 : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
319 frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} 319 has_debug_tool{HasDebugTool()} {}
320 320
321RendererOpenGL::~RendererOpenGL() = default; 321RendererOpenGL::~RendererOpenGL() = default;
322 322
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h
deleted file mode 100644
index 323bf6b39..000000000
--- a/src/video_core/renderer_vulkan/declarations.h
+++ /dev/null
@@ -1,58 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7namespace vk {
8class DispatchLoaderDynamic;
9}
10
11namespace Vulkan {
12constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr;
13}
14
15#define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld)
16#define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0
17#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
18#include <vulkan/vulkan.hpp>
19
20namespace Vulkan {
21
22// vulkan.hpp unique handlers use DispatchLoaderStatic
23template <typename T>
24using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>;
25
26using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>;
27using UniqueBuffer = UniqueHandle<vk::Buffer>;
28using UniqueBufferView = UniqueHandle<vk::BufferView>;
29using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>;
30using UniqueCommandPool = UniqueHandle<vk::CommandPool>;
31using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>;
32using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>;
33using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>;
34using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>;
35using UniqueDevice = UniqueHandle<vk::Device>;
36using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>;
37using UniqueEvent = UniqueHandle<vk::Event>;
38using UniqueFence = UniqueHandle<vk::Fence>;
39using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>;
40using UniqueImage = UniqueHandle<vk::Image>;
41using UniqueImageView = UniqueHandle<vk::ImageView>;
42using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>;
43using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>;
44using UniquePipeline = UniqueHandle<vk::Pipeline>;
45using UniquePipelineCache = UniqueHandle<vk::PipelineCache>;
46using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>;
47using UniqueQueryPool = UniqueHandle<vk::QueryPool>;
48using UniqueRenderPass = UniqueHandle<vk::RenderPass>;
49using UniqueSampler = UniqueHandle<vk::Sampler>;
50using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>;
51using UniqueSemaphore = UniqueHandle<vk::Semaphore>;
52using UniqueShaderModule = UniqueHandle<vk::ShaderModule>;
53using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>;
54using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>;
55using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>;
56using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>;
57
58} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 7480cb7c3..8681b821f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -2,13 +2,15 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <iterator>
6
5#include "common/assert.h" 7#include "common/assert.h"
6#include "common/common_types.h" 8#include "common/common_types.h"
7#include "common/logging/log.h" 9#include "common/logging/log.h"
8#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h" 11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h" 12#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/wrapper.h"
12#include "video_core/surface.h" 14#include "video_core/surface.h"
13 15
14namespace Vulkan::MaxwellToVK { 16namespace Vulkan::MaxwellToVK {
@@ -17,88 +19,89 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
17 19
18namespace Sampler { 20namespace Sampler {
19 21
20vk::Filter Filter(Tegra::Texture::TextureFilter filter) { 22VkFilter Filter(Tegra::Texture::TextureFilter filter) {
21 switch (filter) { 23 switch (filter) {
22 case Tegra::Texture::TextureFilter::Linear: 24 case Tegra::Texture::TextureFilter::Linear:
23 return vk::Filter::eLinear; 25 return VK_FILTER_LINEAR;
24 case Tegra::Texture::TextureFilter::Nearest: 26 case Tegra::Texture::TextureFilter::Nearest:
25 return vk::Filter::eNearest; 27 return VK_FILTER_NEAREST;
26 } 28 }
27 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); 29 UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter));
28 return {}; 30 return {};
29} 31}
30 32
31vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { 33VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) {
32 switch (mipmap_filter) { 34 switch (mipmap_filter) {
33 case Tegra::Texture::TextureMipmapFilter::None: 35 case Tegra::Texture::TextureMipmapFilter::None:
34 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping 36 // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping
35 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to 37 // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to
36 // use an image view with a single mipmap level to emulate this. 38 // use an image view with a single mipmap level to emulate this.
37 return vk::SamplerMipmapMode::eLinear; 39 return VK_SAMPLER_MIPMAP_MODE_LINEAR;
40 ;
38 case Tegra::Texture::TextureMipmapFilter::Linear: 41 case Tegra::Texture::TextureMipmapFilter::Linear:
39 return vk::SamplerMipmapMode::eLinear; 42 return VK_SAMPLER_MIPMAP_MODE_LINEAR;
40 case Tegra::Texture::TextureMipmapFilter::Nearest: 43 case Tegra::Texture::TextureMipmapFilter::Nearest:
41 return vk::SamplerMipmapMode::eNearest; 44 return VK_SAMPLER_MIPMAP_MODE_NEAREST;
42 } 45 }
43 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); 46 UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter));
44 return {}; 47 return {};
45} 48}
46 49
47vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, 50VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
48 Tegra::Texture::TextureFilter filter) { 51 Tegra::Texture::TextureFilter filter) {
49 switch (wrap_mode) { 52 switch (wrap_mode) {
50 case Tegra::Texture::WrapMode::Wrap: 53 case Tegra::Texture::WrapMode::Wrap:
51 return vk::SamplerAddressMode::eRepeat; 54 return VK_SAMPLER_ADDRESS_MODE_REPEAT;
52 case Tegra::Texture::WrapMode::Mirror: 55 case Tegra::Texture::WrapMode::Mirror:
53 return vk::SamplerAddressMode::eMirroredRepeat; 56 return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT;
54 case Tegra::Texture::WrapMode::ClampToEdge: 57 case Tegra::Texture::WrapMode::ClampToEdge:
55 return vk::SamplerAddressMode::eClampToEdge; 58 return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
56 case Tegra::Texture::WrapMode::Border: 59 case Tegra::Texture::WrapMode::Border:
57 return vk::SamplerAddressMode::eClampToBorder; 60 return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
58 case Tegra::Texture::WrapMode::Clamp: 61 case Tegra::Texture::WrapMode::Clamp:
59 if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { 62 if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
60 // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this 63 // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
61 // by sending an invalid enumeration. 64 // by sending an invalid enumeration.
62 return static_cast<vk::SamplerAddressMode>(0xcafe); 65 return static_cast<VkSamplerAddressMode>(0xcafe);
63 } 66 }
64 // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors 67 // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
65 switch (filter) { 68 switch (filter) {
66 case Tegra::Texture::TextureFilter::Nearest: 69 case Tegra::Texture::TextureFilter::Nearest:
67 return vk::SamplerAddressMode::eClampToEdge; 70 return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
68 case Tegra::Texture::TextureFilter::Linear: 71 case Tegra::Texture::TextureFilter::Linear:
69 return vk::SamplerAddressMode::eClampToBorder; 72 return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
70 } 73 }
71 UNREACHABLE(); 74 UNREACHABLE();
72 return vk::SamplerAddressMode::eClampToEdge; 75 return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
73 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: 76 case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
74 return vk::SamplerAddressMode::eMirrorClampToEdge; 77 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
75 case Tegra::Texture::WrapMode::MirrorOnceBorder: 78 case Tegra::Texture::WrapMode::MirrorOnceBorder:
76 UNIMPLEMENTED(); 79 UNIMPLEMENTED();
77 return vk::SamplerAddressMode::eMirrorClampToEdge; 80 return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
78 default: 81 default:
79 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); 82 UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
80 return {}; 83 return {};
81 } 84 }
82} 85}
83 86
84vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { 87VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
85 switch (depth_compare_func) { 88 switch (depth_compare_func) {
86 case Tegra::Texture::DepthCompareFunc::Never: 89 case Tegra::Texture::DepthCompareFunc::Never:
87 return vk::CompareOp::eNever; 90 return VK_COMPARE_OP_NEVER;
88 case Tegra::Texture::DepthCompareFunc::Less: 91 case Tegra::Texture::DepthCompareFunc::Less:
89 return vk::CompareOp::eLess; 92 return VK_COMPARE_OP_LESS;
90 case Tegra::Texture::DepthCompareFunc::LessEqual: 93 case Tegra::Texture::DepthCompareFunc::LessEqual:
91 return vk::CompareOp::eLessOrEqual; 94 return VK_COMPARE_OP_LESS_OR_EQUAL;
92 case Tegra::Texture::DepthCompareFunc::Equal: 95 case Tegra::Texture::DepthCompareFunc::Equal:
93 return vk::CompareOp::eEqual; 96 return VK_COMPARE_OP_EQUAL;
94 case Tegra::Texture::DepthCompareFunc::NotEqual: 97 case Tegra::Texture::DepthCompareFunc::NotEqual:
95 return vk::CompareOp::eNotEqual; 98 return VK_COMPARE_OP_NOT_EQUAL;
96 case Tegra::Texture::DepthCompareFunc::Greater: 99 case Tegra::Texture::DepthCompareFunc::Greater:
97 return vk::CompareOp::eGreater; 100 return VK_COMPARE_OP_GREATER;
98 case Tegra::Texture::DepthCompareFunc::GreaterEqual: 101 case Tegra::Texture::DepthCompareFunc::GreaterEqual:
99 return vk::CompareOp::eGreaterOrEqual; 102 return VK_COMPARE_OP_GREATER_OR_EQUAL;
100 case Tegra::Texture::DepthCompareFunc::Always: 103 case Tegra::Texture::DepthCompareFunc::Always:
101 return vk::CompareOp::eAlways; 104 return VK_COMPARE_OP_ALWAYS;
102 } 105 }
103 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", 106 UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}",
104 static_cast<u32>(depth_compare_func)); 107 static_cast<u32>(depth_compare_func));
@@ -112,92 +115,92 @@ namespace {
112enum : u32 { Attachable = 1, Storage = 2 }; 115enum : u32 { Attachable = 1, Storage = 2 };
113 116
114struct FormatTuple { 117struct FormatTuple {
115 vk::Format format; ///< Vulkan format 118 VkFormat format; ///< Vulkan format
116 int usage; ///< Describes image format usage 119 int usage = 0; ///< Describes image format usage
117} constexpr tex_format_tuples[] = { 120} constexpr tex_format_tuples[] = {
118 {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage}, // ABGR8U 121 {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U
119 {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage}, // ABGR8S 122 {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S
120 {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI 123 {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI
121 {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U 124 {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U
122 {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U 125 {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U
123 {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle) 126 {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle)
124 {vk::Format::eR8Unorm, Attachable | Storage}, // R8U 127 {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U
125 {vk::Format::eR8Uint, Attachable | Storage}, // R8UI 128 {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI
126 {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F 129 {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F
127 {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U 130 {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U
128 {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S 131 {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S
129 {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI 132 {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI
130 {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F 133 {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F
131 {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI 134 {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI
132 {vk::Format::eBc1RgbaUnormBlock, {}}, // DXT1 135 {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1
133 {vk::Format::eBc2UnormBlock, {}}, // DXT23 136 {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23
134 {vk::Format::eBc3UnormBlock, {}}, // DXT45 137 {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45
135 {vk::Format::eBc4UnormBlock, {}}, // DXN1 138 {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1
136 {vk::Format::eBc5UnormBlock, {}}, // DXN2UNORM 139 {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM
137 {vk::Format::eBc5SnormBlock, {}}, // DXN2SNORM 140 {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM
138 {vk::Format::eBc7UnormBlock, {}}, // BC7U 141 {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U
139 {vk::Format::eBc6HUfloatBlock, {}}, // BC6H_UF16 142 {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
140 {vk::Format::eBc6HSfloatBlock, {}}, // BC6H_SF16 143 {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
141 {vk::Format::eAstc4x4UnormBlock, {}}, // ASTC_2D_4X4 144 {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
142 {vk::Format::eB8G8R8A8Unorm, {}}, // BGRA8 145 {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8
143 {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage}, // RGBA32F 146 {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
144 {vk::Format::eR32G32Sfloat, Attachable | Storage}, // RG32F 147 {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
145 {vk::Format::eR32Sfloat, Attachable | Storage}, // R32F 148 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
146 {vk::Format::eR16Sfloat, Attachable | Storage}, // R16F 149 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
147 {vk::Format::eR16Unorm, Attachable | Storage}, // R16U 150 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
148 {vk::Format::eUndefined, {}}, // R16S 151 {VK_FORMAT_UNDEFINED}, // R16S
149 {vk::Format::eUndefined, {}}, // R16UI 152 {VK_FORMAT_UNDEFINED}, // R16UI
150 {vk::Format::eUndefined, {}}, // R16I 153 {VK_FORMAT_UNDEFINED}, // R16I
151 {vk::Format::eR16G16Unorm, Attachable | Storage}, // RG16 154 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
152 {vk::Format::eR16G16Sfloat, Attachable | Storage}, // RG16F 155 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
153 {vk::Format::eUndefined, {}}, // RG16UI 156 {VK_FORMAT_UNDEFINED}, // RG16UI
154 {vk::Format::eUndefined, {}}, // RG16I 157 {VK_FORMAT_UNDEFINED}, // RG16I
155 {vk::Format::eR16G16Snorm, Attachable | Storage}, // RG16S 158 {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S
156 {vk::Format::eUndefined, {}}, // RGB32F 159 {VK_FORMAT_UNDEFINED}, // RGB32F
157 {vk::Format::eR8G8B8A8Srgb, Attachable}, // RGBA8_SRGB 160 {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB
158 {vk::Format::eR8G8Unorm, Attachable | Storage}, // RG8U 161 {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U
159 {vk::Format::eR8G8Snorm, Attachable | Storage}, // RG8S 162 {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S
160 {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI 163 {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI
161 {vk::Format::eUndefined, {}}, // RGBX16F 164 {VK_FORMAT_UNDEFINED}, // RGBX16F
162 {vk::Format::eR32Uint, Attachable | Storage}, // R32UI 165 {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI
163 {vk::Format::eR32Sint, Attachable | Storage}, // R32I 166 {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I
164 {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 167 {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
165 {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 168 {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
166 {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 169 {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
167 {vk::Format::eUndefined, {}}, // BGRA8_SRGB 170 {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB
168 {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB 171 {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
169 {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB 172 {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
170 {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB 173 {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
171 {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB 174 {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB
172 {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U 175 {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U
173 {vk::Format::eAstc4x4SrgbBlock, {}}, // ASTC_2D_4X4_SRGB 176 {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB
174 {vk::Format::eAstc8x8SrgbBlock, {}}, // ASTC_2D_8X8_SRGB 177 {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB
175 {vk::Format::eAstc8x5SrgbBlock, {}}, // ASTC_2D_8X5_SRGB 178 {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB
176 {vk::Format::eAstc5x4SrgbBlock, {}}, // ASTC_2D_5X4_SRGB 179 {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB
177 {vk::Format::eAstc5x5UnormBlock, {}}, // ASTC_2D_5X5 180 {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5
178 {vk::Format::eAstc5x5SrgbBlock, {}}, // ASTC_2D_5X5_SRGB 181 {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB
179 {vk::Format::eAstc10x8UnormBlock, {}}, // ASTC_2D_10X8 182 {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8
180 {vk::Format::eAstc10x8SrgbBlock, {}}, // ASTC_2D_10X8_SRGB 183 {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB
181 {vk::Format::eAstc6x6UnormBlock, {}}, // ASTC_2D_6X6 184 {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6
182 {vk::Format::eAstc6x6SrgbBlock, {}}, // ASTC_2D_6X6_SRGB 185 {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB
183 {vk::Format::eAstc10x10UnormBlock, {}}, // ASTC_2D_10X10 186 {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10
184 {vk::Format::eAstc10x10SrgbBlock, {}}, // ASTC_2D_10X10_SRGB 187 {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB
185 {vk::Format::eAstc12x12UnormBlock, {}}, // ASTC_2D_12X12 188 {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12
186 {vk::Format::eAstc12x12SrgbBlock, {}}, // ASTC_2D_12X12_SRGB 189 {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB
187 {vk::Format::eAstc8x6UnormBlock, {}}, // ASTC_2D_8X6 190 {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6
188 {vk::Format::eAstc8x6SrgbBlock, {}}, // ASTC_2D_8X6_SRGB 191 {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB
189 {vk::Format::eAstc6x5UnormBlock, {}}, // ASTC_2D_6X5 192 {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5
190 {vk::Format::eAstc6x5SrgbBlock, {}}, // ASTC_2D_6X5_SRGB 193 {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB
191 {vk::Format::eE5B9G9R9UfloatPack32, {}}, // E5B9G9R9F 194 {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F
192 195
193 // Depth formats 196 // Depth formats
194 {vk::Format::eD32Sfloat, Attachable}, // Z32F 197 {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F
195 {vk::Format::eD16Unorm, Attachable}, // Z16 198 {VK_FORMAT_D16_UNORM, Attachable}, // Z16
196 199
197 // DepthStencil formats 200 // DepthStencil formats
198 {vk::Format::eD24UnormS8Uint, Attachable}, // Z24S8 201 {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8
199 {vk::Format::eD24UnormS8Uint, Attachable}, // S8Z24 (emulated) 202 {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated)
200 {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8 203 {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8
201}; 204};
202static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); 205static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat);
203 206
@@ -212,106 +215,106 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
212 ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); 215 ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples));
213 216
214 auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; 217 auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)];
215 if (tuple.format == vk::Format::eUndefined) { 218 if (tuple.format == VK_FORMAT_UNDEFINED) {
216 UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", 219 UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}",
217 static_cast<u32>(pixel_format)); 220 static_cast<u32>(pixel_format));
218 return {vk::Format::eA8B8G8R8UnormPack32, true, true}; 221 return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true};
219 } 222 }
220 223
221 // Use ABGR8 on hardware that doesn't support ASTC natively 224 // Use ABGR8 on hardware that doesn't support ASTC natively
222 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { 225 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
223 tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) 226 tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
224 ? vk::Format::eA8B8G8R8SrgbPack32 227 ? VK_FORMAT_A8B8G8R8_SRGB_PACK32
225 : vk::Format::eA8B8G8R8UnormPack32; 228 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
226 } 229 }
227 const bool attachable = tuple.usage & Attachable; 230 const bool attachable = tuple.usage & Attachable;
228 const bool storage = tuple.usage & Storage; 231 const bool storage = tuple.usage & Storage;
229 232
230 vk::FormatFeatureFlags usage; 233 VkFormatFeatureFlags usage;
231 if (format_type == FormatType::Buffer) { 234 if (format_type == FormatType::Buffer) {
232 usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer | 235 usage =
233 vk::FormatFeatureFlagBits::eUniformTexelBuffer; 236 VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
234 } else { 237 } else {
235 usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst | 238 usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
236 vk::FormatFeatureFlagBits::eTransferSrc; 239 VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
237 if (attachable) { 240 if (attachable) {
238 usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment 241 usage |= IsZetaFormat(pixel_format) ? VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT
239 : vk::FormatFeatureFlagBits::eColorAttachment; 242 : VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT;
240 } 243 }
241 if (storage) { 244 if (storage) {
242 usage |= vk::FormatFeatureFlagBits::eStorageImage; 245 usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
243 } 246 }
244 } 247 }
245 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; 248 return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
246} 249}
247 250
248vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { 251VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) {
249 switch (stage) { 252 switch (stage) {
250 case Tegra::Engines::ShaderType::Vertex: 253 case Tegra::Engines::ShaderType::Vertex:
251 return vk::ShaderStageFlagBits::eVertex; 254 return VK_SHADER_STAGE_VERTEX_BIT;
252 case Tegra::Engines::ShaderType::TesselationControl: 255 case Tegra::Engines::ShaderType::TesselationControl:
253 return vk::ShaderStageFlagBits::eTessellationControl; 256 return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
254 case Tegra::Engines::ShaderType::TesselationEval: 257 case Tegra::Engines::ShaderType::TesselationEval:
255 return vk::ShaderStageFlagBits::eTessellationEvaluation; 258 return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
256 case Tegra::Engines::ShaderType::Geometry: 259 case Tegra::Engines::ShaderType::Geometry:
257 return vk::ShaderStageFlagBits::eGeometry; 260 return VK_SHADER_STAGE_GEOMETRY_BIT;
258 case Tegra::Engines::ShaderType::Fragment: 261 case Tegra::Engines::ShaderType::Fragment:
259 return vk::ShaderStageFlagBits::eFragment; 262 return VK_SHADER_STAGE_FRAGMENT_BIT;
260 case Tegra::Engines::ShaderType::Compute: 263 case Tegra::Engines::ShaderType::Compute:
261 return vk::ShaderStageFlagBits::eCompute; 264 return VK_SHADER_STAGE_COMPUTE_BIT;
262 } 265 }
263 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); 266 UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage));
264 return {}; 267 return {};
265} 268}
266 269
267vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, 270VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
268 Maxwell::PrimitiveTopology topology) { 271 Maxwell::PrimitiveTopology topology) {
269 switch (topology) { 272 switch (topology) {
270 case Maxwell::PrimitiveTopology::Points: 273 case Maxwell::PrimitiveTopology::Points:
271 return vk::PrimitiveTopology::ePointList; 274 return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
272 case Maxwell::PrimitiveTopology::Lines: 275 case Maxwell::PrimitiveTopology::Lines:
273 return vk::PrimitiveTopology::eLineList; 276 return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
274 case Maxwell::PrimitiveTopology::LineStrip: 277 case Maxwell::PrimitiveTopology::LineStrip:
275 return vk::PrimitiveTopology::eLineStrip; 278 return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
276 case Maxwell::PrimitiveTopology::Triangles: 279 case Maxwell::PrimitiveTopology::Triangles:
277 return vk::PrimitiveTopology::eTriangleList; 280 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
278 case Maxwell::PrimitiveTopology::TriangleStrip: 281 case Maxwell::PrimitiveTopology::TriangleStrip:
279 return vk::PrimitiveTopology::eTriangleStrip; 282 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
280 case Maxwell::PrimitiveTopology::TriangleFan: 283 case Maxwell::PrimitiveTopology::TriangleFan:
281 return vk::PrimitiveTopology::eTriangleFan; 284 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
282 case Maxwell::PrimitiveTopology::Quads: 285 case Maxwell::PrimitiveTopology::Quads:
283 // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases 286 // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases
284 return vk::PrimitiveTopology::eTriangleList; 287 return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
285 case Maxwell::PrimitiveTopology::Patches: 288 case Maxwell::PrimitiveTopology::Patches:
286 return vk::PrimitiveTopology::ePatchList; 289 return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
287 default: 290 default:
288 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); 291 UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
289 return {}; 292 return {};
290 } 293 }
291} 294}
292 295
293vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { 296VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
294 switch (type) { 297 switch (type) {
295 case Maxwell::VertexAttribute::Type::SignedNorm: 298 case Maxwell::VertexAttribute::Type::SignedNorm:
296 switch (size) { 299 switch (size) {
297 case Maxwell::VertexAttribute::Size::Size_8: 300 case Maxwell::VertexAttribute::Size::Size_8:
298 return vk::Format::eR8Snorm; 301 return VK_FORMAT_R8_SNORM;
299 case Maxwell::VertexAttribute::Size::Size_8_8: 302 case Maxwell::VertexAttribute::Size::Size_8_8:
300 return vk::Format::eR8G8Snorm; 303 return VK_FORMAT_R8G8_SNORM;
301 case Maxwell::VertexAttribute::Size::Size_8_8_8: 304 case Maxwell::VertexAttribute::Size::Size_8_8_8:
302 return vk::Format::eR8G8B8Snorm; 305 return VK_FORMAT_R8G8B8_SNORM;
303 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 306 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
304 return vk::Format::eR8G8B8A8Snorm; 307 return VK_FORMAT_R8G8B8A8_SNORM;
305 case Maxwell::VertexAttribute::Size::Size_16: 308 case Maxwell::VertexAttribute::Size::Size_16:
306 return vk::Format::eR16Snorm; 309 return VK_FORMAT_R16_SNORM;
307 case Maxwell::VertexAttribute::Size::Size_16_16: 310 case Maxwell::VertexAttribute::Size::Size_16_16:
308 return vk::Format::eR16G16Snorm; 311 return VK_FORMAT_R16G16_SNORM;
309 case Maxwell::VertexAttribute::Size::Size_16_16_16: 312 case Maxwell::VertexAttribute::Size::Size_16_16_16:
310 return vk::Format::eR16G16B16Snorm; 313 return VK_FORMAT_R16G16B16_SNORM;
311 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 314 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
312 return vk::Format::eR16G16B16A16Snorm; 315 return VK_FORMAT_R16G16B16A16_SNORM;
313 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 316 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
314 return vk::Format::eA2B10G10R10SnormPack32; 317 return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
315 default: 318 default:
316 break; 319 break;
317 } 320 }
@@ -319,23 +322,23 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
319 case Maxwell::VertexAttribute::Type::UnsignedNorm: 322 case Maxwell::VertexAttribute::Type::UnsignedNorm:
320 switch (size) { 323 switch (size) {
321 case Maxwell::VertexAttribute::Size::Size_8: 324 case Maxwell::VertexAttribute::Size::Size_8:
322 return vk::Format::eR8Unorm; 325 return VK_FORMAT_R8_UNORM;
323 case Maxwell::VertexAttribute::Size::Size_8_8: 326 case Maxwell::VertexAttribute::Size::Size_8_8:
324 return vk::Format::eR8G8Unorm; 327 return VK_FORMAT_R8G8_UNORM;
325 case Maxwell::VertexAttribute::Size::Size_8_8_8: 328 case Maxwell::VertexAttribute::Size::Size_8_8_8:
326 return vk::Format::eR8G8B8Unorm; 329 return VK_FORMAT_R8G8B8_UNORM;
327 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 330 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
328 return vk::Format::eR8G8B8A8Unorm; 331 return VK_FORMAT_R8G8B8A8_UNORM;
329 case Maxwell::VertexAttribute::Size::Size_16: 332 case Maxwell::VertexAttribute::Size::Size_16:
330 return vk::Format::eR16Unorm; 333 return VK_FORMAT_R16_UNORM;
331 case Maxwell::VertexAttribute::Size::Size_16_16: 334 case Maxwell::VertexAttribute::Size::Size_16_16:
332 return vk::Format::eR16G16Unorm; 335 return VK_FORMAT_R16G16_UNORM;
333 case Maxwell::VertexAttribute::Size::Size_16_16_16: 336 case Maxwell::VertexAttribute::Size::Size_16_16_16:
334 return vk::Format::eR16G16B16Unorm; 337 return VK_FORMAT_R16G16B16_UNORM;
335 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 338 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
336 return vk::Format::eR16G16B16A16Unorm; 339 return VK_FORMAT_R16G16B16A16_UNORM;
337 case Maxwell::VertexAttribute::Size::Size_10_10_10_2: 340 case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
338 return vk::Format::eA2B10G10R10UnormPack32; 341 return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
339 default: 342 default:
340 break; 343 break;
341 } 344 }
@@ -343,59 +346,69 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
343 case Maxwell::VertexAttribute::Type::SignedInt: 346 case Maxwell::VertexAttribute::Type::SignedInt:
344 switch (size) { 347 switch (size) {
345 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 348 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
346 return vk::Format::eR16G16B16A16Sint; 349 return VK_FORMAT_R16G16B16A16_SINT;
347 case Maxwell::VertexAttribute::Size::Size_8: 350 case Maxwell::VertexAttribute::Size::Size_8:
348 return vk::Format::eR8Sint; 351 return VK_FORMAT_R8_SINT;
349 case Maxwell::VertexAttribute::Size::Size_8_8: 352 case Maxwell::VertexAttribute::Size::Size_8_8:
350 return vk::Format::eR8G8Sint; 353 return VK_FORMAT_R8G8_SINT;
351 case Maxwell::VertexAttribute::Size::Size_8_8_8: 354 case Maxwell::VertexAttribute::Size::Size_8_8_8:
352 return vk::Format::eR8G8B8Sint; 355 return VK_FORMAT_R8G8B8_SINT;
353 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 356 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
354 return vk::Format::eR8G8B8A8Sint; 357 return VK_FORMAT_R8G8B8A8_SINT;
355 case Maxwell::VertexAttribute::Size::Size_32: 358 case Maxwell::VertexAttribute::Size::Size_32:
356 return vk::Format::eR32Sint; 359 return VK_FORMAT_R32_SINT;
357 default: 360 default:
358 break; 361 break;
359 } 362 }
363 break;
360 case Maxwell::VertexAttribute::Type::UnsignedInt: 364 case Maxwell::VertexAttribute::Type::UnsignedInt:
361 switch (size) { 365 switch (size) {
362 case Maxwell::VertexAttribute::Size::Size_8: 366 case Maxwell::VertexAttribute::Size::Size_8:
363 return vk::Format::eR8Uint; 367 return VK_FORMAT_R8_UINT;
364 case Maxwell::VertexAttribute::Size::Size_8_8: 368 case Maxwell::VertexAttribute::Size::Size_8_8:
365 return vk::Format::eR8G8Uint; 369 return VK_FORMAT_R8G8_UINT;
366 case Maxwell::VertexAttribute::Size::Size_8_8_8: 370 case Maxwell::VertexAttribute::Size::Size_8_8_8:
367 return vk::Format::eR8G8B8Uint; 371 return VK_FORMAT_R8G8B8_UINT;
368 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 372 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
369 return vk::Format::eR8G8B8A8Uint; 373 return VK_FORMAT_R8G8B8A8_UINT;
374 case Maxwell::VertexAttribute::Size::Size_16:
375 return VK_FORMAT_R16_UINT;
376 case Maxwell::VertexAttribute::Size::Size_16_16:
377 return VK_FORMAT_R16G16_UINT;
378 case Maxwell::VertexAttribute::Size::Size_16_16_16:
379 return VK_FORMAT_R16G16B16_UINT;
380 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
381 return VK_FORMAT_R16G16B16A16_UINT;
370 case Maxwell::VertexAttribute::Size::Size_32: 382 case Maxwell::VertexAttribute::Size::Size_32:
371 return vk::Format::eR32Uint; 383 return VK_FORMAT_R32_UINT;
372 case Maxwell::VertexAttribute::Size::Size_32_32: 384 case Maxwell::VertexAttribute::Size::Size_32_32:
373 return vk::Format::eR32G32Uint; 385 return VK_FORMAT_R32G32_UINT;
374 case Maxwell::VertexAttribute::Size::Size_32_32_32: 386 case Maxwell::VertexAttribute::Size::Size_32_32_32:
375 return vk::Format::eR32G32B32Uint; 387 return VK_FORMAT_R32G32B32_UINT;
376 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 388 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
377 return vk::Format::eR32G32B32A32Uint; 389 return VK_FORMAT_R32G32B32A32_UINT;
378 default: 390 default:
379 break; 391 break;
380 } 392 }
393 break;
381 case Maxwell::VertexAttribute::Type::UnsignedScaled: 394 case Maxwell::VertexAttribute::Type::UnsignedScaled:
382 switch (size) { 395 switch (size) {
383 case Maxwell::VertexAttribute::Size::Size_8: 396 case Maxwell::VertexAttribute::Size::Size_8:
384 return vk::Format::eR8Uscaled; 397 return VK_FORMAT_R8_USCALED;
385 case Maxwell::VertexAttribute::Size::Size_8_8: 398 case Maxwell::VertexAttribute::Size::Size_8_8:
386 return vk::Format::eR8G8Uscaled; 399 return VK_FORMAT_R8G8_USCALED;
387 case Maxwell::VertexAttribute::Size::Size_8_8_8: 400 case Maxwell::VertexAttribute::Size::Size_8_8_8:
388 return vk::Format::eR8G8B8Uscaled; 401 return VK_FORMAT_R8G8B8_USCALED;
389 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 402 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
390 return vk::Format::eR8G8B8A8Uscaled; 403 return VK_FORMAT_R8G8B8A8_USCALED;
391 case Maxwell::VertexAttribute::Size::Size_16: 404 case Maxwell::VertexAttribute::Size::Size_16:
392 return vk::Format::eR16Uscaled; 405 return VK_FORMAT_R16_USCALED;
393 case Maxwell::VertexAttribute::Size::Size_16_16: 406 case Maxwell::VertexAttribute::Size::Size_16_16:
394 return vk::Format::eR16G16Uscaled; 407 return VK_FORMAT_R16G16_USCALED;
395 case Maxwell::VertexAttribute::Size::Size_16_16_16: 408 case Maxwell::VertexAttribute::Size::Size_16_16_16:
396 return vk::Format::eR16G16B16Uscaled; 409 return VK_FORMAT_R16G16B16_USCALED;
397 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 410 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
398 return vk::Format::eR16G16B16A16Uscaled; 411 return VK_FORMAT_R16G16B16A16_USCALED;
399 default: 412 default:
400 break; 413 break;
401 } 414 }
@@ -403,21 +416,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
403 case Maxwell::VertexAttribute::Type::SignedScaled: 416 case Maxwell::VertexAttribute::Type::SignedScaled:
404 switch (size) { 417 switch (size) {
405 case Maxwell::VertexAttribute::Size::Size_8: 418 case Maxwell::VertexAttribute::Size::Size_8:
406 return vk::Format::eR8Sscaled; 419 return VK_FORMAT_R8_SSCALED;
407 case Maxwell::VertexAttribute::Size::Size_8_8: 420 case Maxwell::VertexAttribute::Size::Size_8_8:
408 return vk::Format::eR8G8Sscaled; 421 return VK_FORMAT_R8G8_SSCALED;
409 case Maxwell::VertexAttribute::Size::Size_8_8_8: 422 case Maxwell::VertexAttribute::Size::Size_8_8_8:
410 return vk::Format::eR8G8B8Sscaled; 423 return VK_FORMAT_R8G8B8_SSCALED;
411 case Maxwell::VertexAttribute::Size::Size_8_8_8_8: 424 case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
412 return vk::Format::eR8G8B8A8Sscaled; 425 return VK_FORMAT_R8G8B8A8_SSCALED;
413 case Maxwell::VertexAttribute::Size::Size_16: 426 case Maxwell::VertexAttribute::Size::Size_16:
414 return vk::Format::eR16Sscaled; 427 return VK_FORMAT_R16_SSCALED;
415 case Maxwell::VertexAttribute::Size::Size_16_16: 428 case Maxwell::VertexAttribute::Size::Size_16_16:
416 return vk::Format::eR16G16Sscaled; 429 return VK_FORMAT_R16G16_SSCALED;
417 case Maxwell::VertexAttribute::Size::Size_16_16_16: 430 case Maxwell::VertexAttribute::Size::Size_16_16_16:
418 return vk::Format::eR16G16B16Sscaled; 431 return VK_FORMAT_R16G16B16_SSCALED;
419 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 432 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
420 return vk::Format::eR16G16B16A16Sscaled; 433 return VK_FORMAT_R16G16B16A16_SSCALED;
421 default: 434 default:
422 break; 435 break;
423 } 436 }
@@ -425,21 +438,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
425 case Maxwell::VertexAttribute::Type::Float: 438 case Maxwell::VertexAttribute::Type::Float:
426 switch (size) { 439 switch (size) {
427 case Maxwell::VertexAttribute::Size::Size_32: 440 case Maxwell::VertexAttribute::Size::Size_32:
428 return vk::Format::eR32Sfloat; 441 return VK_FORMAT_R32_SFLOAT;
429 case Maxwell::VertexAttribute::Size::Size_32_32: 442 case Maxwell::VertexAttribute::Size::Size_32_32:
430 return vk::Format::eR32G32Sfloat; 443 return VK_FORMAT_R32G32_SFLOAT;
431 case Maxwell::VertexAttribute::Size::Size_32_32_32: 444 case Maxwell::VertexAttribute::Size::Size_32_32_32:
432 return vk::Format::eR32G32B32Sfloat; 445 return VK_FORMAT_R32G32B32_SFLOAT;
433 case Maxwell::VertexAttribute::Size::Size_32_32_32_32: 446 case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
434 return vk::Format::eR32G32B32A32Sfloat; 447 return VK_FORMAT_R32G32B32A32_SFLOAT;
435 case Maxwell::VertexAttribute::Size::Size_16: 448 case Maxwell::VertexAttribute::Size::Size_16:
436 return vk::Format::eR16Sfloat; 449 return VK_FORMAT_R16_SFLOAT;
437 case Maxwell::VertexAttribute::Size::Size_16_16: 450 case Maxwell::VertexAttribute::Size::Size_16_16:
438 return vk::Format::eR16G16Sfloat; 451 return VK_FORMAT_R16G16_SFLOAT;
439 case Maxwell::VertexAttribute::Size::Size_16_16_16: 452 case Maxwell::VertexAttribute::Size::Size_16_16_16:
440 return vk::Format::eR16G16B16Sfloat; 453 return VK_FORMAT_R16G16B16_SFLOAT;
441 case Maxwell::VertexAttribute::Size::Size_16_16_16_16: 454 case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
442 return vk::Format::eR16G16B16A16Sfloat; 455 return VK_FORMAT_R16G16B16A16_SFLOAT;
443 default: 456 default:
444 break; 457 break;
445 } 458 }
@@ -450,210 +463,210 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
450 return {}; 463 return {};
451} 464}
452 465
453vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { 466VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) {
454 switch (comparison) { 467 switch (comparison) {
455 case Maxwell::ComparisonOp::Never: 468 case Maxwell::ComparisonOp::Never:
456 case Maxwell::ComparisonOp::NeverOld: 469 case Maxwell::ComparisonOp::NeverOld:
457 return vk::CompareOp::eNever; 470 return VK_COMPARE_OP_NEVER;
458 case Maxwell::ComparisonOp::Less: 471 case Maxwell::ComparisonOp::Less:
459 case Maxwell::ComparisonOp::LessOld: 472 case Maxwell::ComparisonOp::LessOld:
460 return vk::CompareOp::eLess; 473 return VK_COMPARE_OP_LESS;
461 case Maxwell::ComparisonOp::Equal: 474 case Maxwell::ComparisonOp::Equal:
462 case Maxwell::ComparisonOp::EqualOld: 475 case Maxwell::ComparisonOp::EqualOld:
463 return vk::CompareOp::eEqual; 476 return VK_COMPARE_OP_EQUAL;
464 case Maxwell::ComparisonOp::LessEqual: 477 case Maxwell::ComparisonOp::LessEqual:
465 case Maxwell::ComparisonOp::LessEqualOld: 478 case Maxwell::ComparisonOp::LessEqualOld:
466 return vk::CompareOp::eLessOrEqual; 479 return VK_COMPARE_OP_LESS_OR_EQUAL;
467 case Maxwell::ComparisonOp::Greater: 480 case Maxwell::ComparisonOp::Greater:
468 case Maxwell::ComparisonOp::GreaterOld: 481 case Maxwell::ComparisonOp::GreaterOld:
469 return vk::CompareOp::eGreater; 482 return VK_COMPARE_OP_GREATER;
470 case Maxwell::ComparisonOp::NotEqual: 483 case Maxwell::ComparisonOp::NotEqual:
471 case Maxwell::ComparisonOp::NotEqualOld: 484 case Maxwell::ComparisonOp::NotEqualOld:
472 return vk::CompareOp::eNotEqual; 485 return VK_COMPARE_OP_NOT_EQUAL;
473 case Maxwell::ComparisonOp::GreaterEqual: 486 case Maxwell::ComparisonOp::GreaterEqual:
474 case Maxwell::ComparisonOp::GreaterEqualOld: 487 case Maxwell::ComparisonOp::GreaterEqualOld:
475 return vk::CompareOp::eGreaterOrEqual; 488 return VK_COMPARE_OP_GREATER_OR_EQUAL;
476 case Maxwell::ComparisonOp::Always: 489 case Maxwell::ComparisonOp::Always:
477 case Maxwell::ComparisonOp::AlwaysOld: 490 case Maxwell::ComparisonOp::AlwaysOld:
478 return vk::CompareOp::eAlways; 491 return VK_COMPARE_OP_ALWAYS;
479 } 492 }
480 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); 493 UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison));
481 return {}; 494 return {};
482} 495}
483 496
484vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { 497VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) {
485 switch (index_format) { 498 switch (index_format) {
486 case Maxwell::IndexFormat::UnsignedByte: 499 case Maxwell::IndexFormat::UnsignedByte:
487 if (!device.IsExtIndexTypeUint8Supported()) { 500 if (!device.IsExtIndexTypeUint8Supported()) {
488 UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); 501 UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device");
489 return vk::IndexType::eUint16; 502 return VK_INDEX_TYPE_UINT16;
490 } 503 }
491 return vk::IndexType::eUint8EXT; 504 return VK_INDEX_TYPE_UINT8_EXT;
492 case Maxwell::IndexFormat::UnsignedShort: 505 case Maxwell::IndexFormat::UnsignedShort:
493 return vk::IndexType::eUint16; 506 return VK_INDEX_TYPE_UINT16;
494 case Maxwell::IndexFormat::UnsignedInt: 507 case Maxwell::IndexFormat::UnsignedInt:
495 return vk::IndexType::eUint32; 508 return VK_INDEX_TYPE_UINT32;
496 } 509 }
497 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); 510 UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format));
498 return {}; 511 return {};
499} 512}
500 513
501vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { 514VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) {
502 switch (stencil_op) { 515 switch (stencil_op) {
503 case Maxwell::StencilOp::Keep: 516 case Maxwell::StencilOp::Keep:
504 case Maxwell::StencilOp::KeepOGL: 517 case Maxwell::StencilOp::KeepOGL:
505 return vk::StencilOp::eKeep; 518 return VK_STENCIL_OP_KEEP;
506 case Maxwell::StencilOp::Zero: 519 case Maxwell::StencilOp::Zero:
507 case Maxwell::StencilOp::ZeroOGL: 520 case Maxwell::StencilOp::ZeroOGL:
508 return vk::StencilOp::eZero; 521 return VK_STENCIL_OP_ZERO;
509 case Maxwell::StencilOp::Replace: 522 case Maxwell::StencilOp::Replace:
510 case Maxwell::StencilOp::ReplaceOGL: 523 case Maxwell::StencilOp::ReplaceOGL:
511 return vk::StencilOp::eReplace; 524 return VK_STENCIL_OP_REPLACE;
512 case Maxwell::StencilOp::Incr: 525 case Maxwell::StencilOp::Incr:
513 case Maxwell::StencilOp::IncrOGL: 526 case Maxwell::StencilOp::IncrOGL:
514 return vk::StencilOp::eIncrementAndClamp; 527 return VK_STENCIL_OP_INCREMENT_AND_CLAMP;
515 case Maxwell::StencilOp::Decr: 528 case Maxwell::StencilOp::Decr:
516 case Maxwell::StencilOp::DecrOGL: 529 case Maxwell::StencilOp::DecrOGL:
517 return vk::StencilOp::eDecrementAndClamp; 530 return VK_STENCIL_OP_DECREMENT_AND_CLAMP;
518 case Maxwell::StencilOp::Invert: 531 case Maxwell::StencilOp::Invert:
519 case Maxwell::StencilOp::InvertOGL: 532 case Maxwell::StencilOp::InvertOGL:
520 return vk::StencilOp::eInvert; 533 return VK_STENCIL_OP_INVERT;
521 case Maxwell::StencilOp::IncrWrap: 534 case Maxwell::StencilOp::IncrWrap:
522 case Maxwell::StencilOp::IncrWrapOGL: 535 case Maxwell::StencilOp::IncrWrapOGL:
523 return vk::StencilOp::eIncrementAndWrap; 536 return VK_STENCIL_OP_INCREMENT_AND_WRAP;
524 case Maxwell::StencilOp::DecrWrap: 537 case Maxwell::StencilOp::DecrWrap:
525 case Maxwell::StencilOp::DecrWrapOGL: 538 case Maxwell::StencilOp::DecrWrapOGL:
526 return vk::StencilOp::eDecrementAndWrap; 539 return VK_STENCIL_OP_DECREMENT_AND_WRAP;
527 } 540 }
528 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); 541 UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op));
529 return {}; 542 return {};
530} 543}
531 544
532vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { 545VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) {
533 switch (equation) { 546 switch (equation) {
534 case Maxwell::Blend::Equation::Add: 547 case Maxwell::Blend::Equation::Add:
535 case Maxwell::Blend::Equation::AddGL: 548 case Maxwell::Blend::Equation::AddGL:
536 return vk::BlendOp::eAdd; 549 return VK_BLEND_OP_ADD;
537 case Maxwell::Blend::Equation::Subtract: 550 case Maxwell::Blend::Equation::Subtract:
538 case Maxwell::Blend::Equation::SubtractGL: 551 case Maxwell::Blend::Equation::SubtractGL:
539 return vk::BlendOp::eSubtract; 552 return VK_BLEND_OP_SUBTRACT;
540 case Maxwell::Blend::Equation::ReverseSubtract: 553 case Maxwell::Blend::Equation::ReverseSubtract:
541 case Maxwell::Blend::Equation::ReverseSubtractGL: 554 case Maxwell::Blend::Equation::ReverseSubtractGL:
542 return vk::BlendOp::eReverseSubtract; 555 return VK_BLEND_OP_REVERSE_SUBTRACT;
543 case Maxwell::Blend::Equation::Min: 556 case Maxwell::Blend::Equation::Min:
544 case Maxwell::Blend::Equation::MinGL: 557 case Maxwell::Blend::Equation::MinGL:
545 return vk::BlendOp::eMin; 558 return VK_BLEND_OP_MIN;
546 case Maxwell::Blend::Equation::Max: 559 case Maxwell::Blend::Equation::Max:
547 case Maxwell::Blend::Equation::MaxGL: 560 case Maxwell::Blend::Equation::MaxGL:
548 return vk::BlendOp::eMax; 561 return VK_BLEND_OP_MAX;
549 } 562 }
550 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); 563 UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation));
551 return {}; 564 return {};
552} 565}
553 566
554vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { 567VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) {
555 switch (factor) { 568 switch (factor) {
556 case Maxwell::Blend::Factor::Zero: 569 case Maxwell::Blend::Factor::Zero:
557 case Maxwell::Blend::Factor::ZeroGL: 570 case Maxwell::Blend::Factor::ZeroGL:
558 return vk::BlendFactor::eZero; 571 return VK_BLEND_FACTOR_ZERO;
559 case Maxwell::Blend::Factor::One: 572 case Maxwell::Blend::Factor::One:
560 case Maxwell::Blend::Factor::OneGL: 573 case Maxwell::Blend::Factor::OneGL:
561 return vk::BlendFactor::eOne; 574 return VK_BLEND_FACTOR_ONE;
562 case Maxwell::Blend::Factor::SourceColor: 575 case Maxwell::Blend::Factor::SourceColor:
563 case Maxwell::Blend::Factor::SourceColorGL: 576 case Maxwell::Blend::Factor::SourceColorGL:
564 return vk::BlendFactor::eSrcColor; 577 return VK_BLEND_FACTOR_SRC_COLOR;
565 case Maxwell::Blend::Factor::OneMinusSourceColor: 578 case Maxwell::Blend::Factor::OneMinusSourceColor:
566 case Maxwell::Blend::Factor::OneMinusSourceColorGL: 579 case Maxwell::Blend::Factor::OneMinusSourceColorGL:
567 return vk::BlendFactor::eOneMinusSrcColor; 580 return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR;
568 case Maxwell::Blend::Factor::SourceAlpha: 581 case Maxwell::Blend::Factor::SourceAlpha:
569 case Maxwell::Blend::Factor::SourceAlphaGL: 582 case Maxwell::Blend::Factor::SourceAlphaGL:
570 return vk::BlendFactor::eSrcAlpha; 583 return VK_BLEND_FACTOR_SRC_ALPHA;
571 case Maxwell::Blend::Factor::OneMinusSourceAlpha: 584 case Maxwell::Blend::Factor::OneMinusSourceAlpha:
572 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: 585 case Maxwell::Blend::Factor::OneMinusSourceAlphaGL:
573 return vk::BlendFactor::eOneMinusSrcAlpha; 586 return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA;
574 case Maxwell::Blend::Factor::DestAlpha: 587 case Maxwell::Blend::Factor::DestAlpha:
575 case Maxwell::Blend::Factor::DestAlphaGL: 588 case Maxwell::Blend::Factor::DestAlphaGL:
576 return vk::BlendFactor::eDstAlpha; 589 return VK_BLEND_FACTOR_DST_ALPHA;
577 case Maxwell::Blend::Factor::OneMinusDestAlpha: 590 case Maxwell::Blend::Factor::OneMinusDestAlpha:
578 case Maxwell::Blend::Factor::OneMinusDestAlphaGL: 591 case Maxwell::Blend::Factor::OneMinusDestAlphaGL:
579 return vk::BlendFactor::eOneMinusDstAlpha; 592 return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA;
580 case Maxwell::Blend::Factor::DestColor: 593 case Maxwell::Blend::Factor::DestColor:
581 case Maxwell::Blend::Factor::DestColorGL: 594 case Maxwell::Blend::Factor::DestColorGL:
582 return vk::BlendFactor::eDstColor; 595 return VK_BLEND_FACTOR_DST_COLOR;
583 case Maxwell::Blend::Factor::OneMinusDestColor: 596 case Maxwell::Blend::Factor::OneMinusDestColor:
584 case Maxwell::Blend::Factor::OneMinusDestColorGL: 597 case Maxwell::Blend::Factor::OneMinusDestColorGL:
585 return vk::BlendFactor::eOneMinusDstColor; 598 return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
586 case Maxwell::Blend::Factor::SourceAlphaSaturate: 599 case Maxwell::Blend::Factor::SourceAlphaSaturate:
587 case Maxwell::Blend::Factor::SourceAlphaSaturateGL: 600 case Maxwell::Blend::Factor::SourceAlphaSaturateGL:
588 return vk::BlendFactor::eSrcAlphaSaturate; 601 return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE;
589 case Maxwell::Blend::Factor::Source1Color: 602 case Maxwell::Blend::Factor::Source1Color:
590 case Maxwell::Blend::Factor::Source1ColorGL: 603 case Maxwell::Blend::Factor::Source1ColorGL:
591 return vk::BlendFactor::eSrc1Color; 604 return VK_BLEND_FACTOR_SRC1_COLOR;
592 case Maxwell::Blend::Factor::OneMinusSource1Color: 605 case Maxwell::Blend::Factor::OneMinusSource1Color:
593 case Maxwell::Blend::Factor::OneMinusSource1ColorGL: 606 case Maxwell::Blend::Factor::OneMinusSource1ColorGL:
594 return vk::BlendFactor::eOneMinusSrc1Color; 607 return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR;
595 case Maxwell::Blend::Factor::Source1Alpha: 608 case Maxwell::Blend::Factor::Source1Alpha:
596 case Maxwell::Blend::Factor::Source1AlphaGL: 609 case Maxwell::Blend::Factor::Source1AlphaGL:
597 return vk::BlendFactor::eSrc1Alpha; 610 return VK_BLEND_FACTOR_SRC1_ALPHA;
598 case Maxwell::Blend::Factor::OneMinusSource1Alpha: 611 case Maxwell::Blend::Factor::OneMinusSource1Alpha:
599 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: 612 case Maxwell::Blend::Factor::OneMinusSource1AlphaGL:
600 return vk::BlendFactor::eOneMinusSrc1Alpha; 613 return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
601 case Maxwell::Blend::Factor::ConstantColor: 614 case Maxwell::Blend::Factor::ConstantColor:
602 case Maxwell::Blend::Factor::ConstantColorGL: 615 case Maxwell::Blend::Factor::ConstantColorGL:
603 return vk::BlendFactor::eConstantColor; 616 return VK_BLEND_FACTOR_CONSTANT_COLOR;
604 case Maxwell::Blend::Factor::OneMinusConstantColor: 617 case Maxwell::Blend::Factor::OneMinusConstantColor:
605 case Maxwell::Blend::Factor::OneMinusConstantColorGL: 618 case Maxwell::Blend::Factor::OneMinusConstantColorGL:
606 return vk::BlendFactor::eOneMinusConstantColor; 619 return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR;
607 case Maxwell::Blend::Factor::ConstantAlpha: 620 case Maxwell::Blend::Factor::ConstantAlpha:
608 case Maxwell::Blend::Factor::ConstantAlphaGL: 621 case Maxwell::Blend::Factor::ConstantAlphaGL:
609 return vk::BlendFactor::eConstantAlpha; 622 return VK_BLEND_FACTOR_CONSTANT_ALPHA;
610 case Maxwell::Blend::Factor::OneMinusConstantAlpha: 623 case Maxwell::Blend::Factor::OneMinusConstantAlpha:
611 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: 624 case Maxwell::Blend::Factor::OneMinusConstantAlphaGL:
612 return vk::BlendFactor::eOneMinusConstantAlpha; 625 return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA;
613 } 626 }
614 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); 627 UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor));
615 return {}; 628 return {};
616} 629}
617 630
618vk::FrontFace FrontFace(Maxwell::FrontFace front_face) { 631VkFrontFace FrontFace(Maxwell::FrontFace front_face) {
619 switch (front_face) { 632 switch (front_face) {
620 case Maxwell::FrontFace::ClockWise: 633 case Maxwell::FrontFace::ClockWise:
621 return vk::FrontFace::eClockwise; 634 return VK_FRONT_FACE_CLOCKWISE;
622 case Maxwell::FrontFace::CounterClockWise: 635 case Maxwell::FrontFace::CounterClockWise:
623 return vk::FrontFace::eCounterClockwise; 636 return VK_FRONT_FACE_COUNTER_CLOCKWISE;
624 } 637 }
625 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); 638 UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face));
626 return {}; 639 return {};
627} 640}
628 641
629vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) { 642VkCullModeFlags CullFace(Maxwell::CullFace cull_face) {
630 switch (cull_face) { 643 switch (cull_face) {
631 case Maxwell::CullFace::Front: 644 case Maxwell::CullFace::Front:
632 return vk::CullModeFlagBits::eFront; 645 return VK_CULL_MODE_FRONT_BIT;
633 case Maxwell::CullFace::Back: 646 case Maxwell::CullFace::Back:
634 return vk::CullModeFlagBits::eBack; 647 return VK_CULL_MODE_BACK_BIT;
635 case Maxwell::CullFace::FrontAndBack: 648 case Maxwell::CullFace::FrontAndBack:
636 return vk::CullModeFlagBits::eFrontAndBack; 649 return VK_CULL_MODE_FRONT_AND_BACK;
637 } 650 }
638 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); 651 UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face));
639 return {}; 652 return {};
640} 653}
641 654
642vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { 655VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
643 switch (swizzle) { 656 switch (swizzle) {
644 case Tegra::Texture::SwizzleSource::Zero: 657 case Tegra::Texture::SwizzleSource::Zero:
645 return vk::ComponentSwizzle::eZero; 658 return VK_COMPONENT_SWIZZLE_ZERO;
646 case Tegra::Texture::SwizzleSource::R: 659 case Tegra::Texture::SwizzleSource::R:
647 return vk::ComponentSwizzle::eR; 660 return VK_COMPONENT_SWIZZLE_R;
648 case Tegra::Texture::SwizzleSource::G: 661 case Tegra::Texture::SwizzleSource::G:
649 return vk::ComponentSwizzle::eG; 662 return VK_COMPONENT_SWIZZLE_G;
650 case Tegra::Texture::SwizzleSource::B: 663 case Tegra::Texture::SwizzleSource::B:
651 return vk::ComponentSwizzle::eB; 664 return VK_COMPONENT_SWIZZLE_B;
652 case Tegra::Texture::SwizzleSource::A: 665 case Tegra::Texture::SwizzleSource::A:
653 return vk::ComponentSwizzle::eA; 666 return VK_COMPONENT_SWIZZLE_A;
654 case Tegra::Texture::SwizzleSource::OneInt: 667 case Tegra::Texture::SwizzleSource::OneInt:
655 case Tegra::Texture::SwizzleSource::OneFloat: 668 case Tegra::Texture::SwizzleSource::OneFloat:
656 return vk::ComponentSwizzle::eOne; 669 return VK_COMPONENT_SWIZZLE_ONE;
657 } 670 }
658 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); 671 UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle));
659 return {}; 672 return {};
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 24f6ab544..81bce4c6c 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -6,8 +6,8 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_device.h" 9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/wrapper.h"
11#include "video_core/surface.h" 11#include "video_core/surface.h"
12#include "video_core/textures/texture.h" 12#include "video_core/textures/texture.h"
13 13
@@ -18,46 +18,45 @@ using PixelFormat = VideoCore::Surface::PixelFormat;
18 18
19namespace Sampler { 19namespace Sampler {
20 20
21vk::Filter Filter(Tegra::Texture::TextureFilter filter); 21VkFilter Filter(Tegra::Texture::TextureFilter filter);
22 22
23vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); 23VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
24 24
25vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, 25VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
26 Tegra::Texture::TextureFilter filter); 26 Tegra::Texture::TextureFilter filter);
27 27
28vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); 28VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
29 29
30} // namespace Sampler 30} // namespace Sampler
31 31
32struct FormatInfo { 32struct FormatInfo {
33 vk::Format format; 33 VkFormat format;
34 bool attachable; 34 bool attachable;
35 bool storage; 35 bool storage;
36}; 36};
37 37
38FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); 38FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format);
39 39
40vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); 40VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage);
41 41
42vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device, 42VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology);
43 Maxwell::PrimitiveTopology topology);
44 43
45vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); 44VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size);
46 45
47vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); 46VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison);
48 47
49vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); 48VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format);
50 49
51vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); 50VkStencilOp StencilOp(Maxwell::StencilOp stencil_op);
52 51
53vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); 52VkBlendOp BlendEquation(Maxwell::Blend::Equation equation);
54 53
55vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); 54VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor);
56 55
57vk::FrontFace FrontFace(Maxwell::FrontFace front_face); 56VkFrontFace FrontFace(Maxwell::FrontFace front_face);
58 57
59vk::CullModeFlags CullFace(Maxwell::CullFace cull_face); 58VkCullModeFlags CullFace(Maxwell::CullFace cull_face);
60 59
61vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); 60VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
62 61
63} // namespace Vulkan::MaxwellToVK 62} // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 6953aaafe..dd590c38b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -2,13 +2,18 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <cstring>
5#include <memory> 8#include <memory>
6#include <optional> 9#include <optional>
10#include <string>
7#include <vector> 11#include <vector>
8 12
9#include <fmt/format.h> 13#include <fmt/format.h>
10 14
11#include "common/assert.h" 15#include "common/assert.h"
16#include "common/dynamic_library.h"
12#include "common/logging/log.h" 17#include "common/logging/log.h"
13#include "common/telemetry.h" 18#include "common/telemetry.h"
14#include "core/core.h" 19#include "core/core.h"
@@ -19,7 +24,6 @@
19#include "core/settings.h" 24#include "core/settings.h"
20#include "core/telemetry_session.h" 25#include "core/telemetry_session.h"
21#include "video_core/gpu.h" 26#include "video_core/gpu.h"
22#include "video_core/renderer_vulkan/declarations.h"
23#include "video_core/renderer_vulkan/renderer_vulkan.h" 27#include "video_core/renderer_vulkan/renderer_vulkan.h"
24#include "video_core/renderer_vulkan/vk_blit_screen.h" 28#include "video_core/renderer_vulkan/vk_blit_screen.h"
25#include "video_core/renderer_vulkan/vk_device.h" 29#include "video_core/renderer_vulkan/vk_device.h"
@@ -29,30 +33,145 @@
29#include "video_core/renderer_vulkan/vk_scheduler.h" 33#include "video_core/renderer_vulkan/vk_scheduler.h"
30#include "video_core/renderer_vulkan/vk_state_tracker.h" 34#include "video_core/renderer_vulkan/vk_state_tracker.h"
31#include "video_core/renderer_vulkan/vk_swapchain.h" 35#include "video_core/renderer_vulkan/vk_swapchain.h"
36#include "video_core/renderer_vulkan/wrapper.h"
37
38// Include these late to avoid polluting previous headers
39#ifdef _WIN32
40#include <windows.h>
41// ensure include order
42#include <vulkan/vulkan_win32.h>
43#endif
44
45#ifdef __linux__
46#include <X11/Xlib.h>
47#include <vulkan/vulkan_wayland.h>
48#include <vulkan/vulkan_xlib.h>
49#endif
32 50
33namespace Vulkan { 51namespace Vulkan {
34 52
35namespace { 53namespace {
36 54
37VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, 55using Core::Frontend::WindowSystemType;
56
57VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
38 VkDebugUtilsMessageTypeFlagsEXT type, 58 VkDebugUtilsMessageTypeFlagsEXT type,
39 const VkDebugUtilsMessengerCallbackDataEXT* data, 59 const VkDebugUtilsMessengerCallbackDataEXT* data,
40 [[maybe_unused]] void* user_data) { 60 [[maybe_unused]] void* user_data) {
41 const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_};
42 const char* message{data->pMessage}; 61 const char* message{data->pMessage};
43 62
44 if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { 63 if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
45 LOG_CRITICAL(Render_Vulkan, "{}", message); 64 LOG_CRITICAL(Render_Vulkan, "{}", message);
46 } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) { 65 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) {
47 LOG_WARNING(Render_Vulkan, "{}", message); 66 LOG_WARNING(Render_Vulkan, "{}", message);
48 } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) { 67 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) {
49 LOG_INFO(Render_Vulkan, "{}", message); 68 LOG_INFO(Render_Vulkan, "{}", message);
50 } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) { 69 } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) {
51 LOG_DEBUG(Render_Vulkan, "{}", message); 70 LOG_DEBUG(Render_Vulkan, "{}", message);
52 } 71 }
53 return VK_FALSE; 72 return VK_FALSE;
54} 73}
55 74
75Common::DynamicLibrary OpenVulkanLibrary() {
76 Common::DynamicLibrary library;
77#ifdef __APPLE__
78 // Check if a path to a specific Vulkan library has been specified.
79 char* libvulkan_env = getenv("LIBVULKAN_PATH");
80 if (!libvulkan_env || !library.Open(libvulkan_env)) {
81 // Use the libvulkan.dylib from the application bundle.
82 std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib";
83 library.Open(filename.c_str());
84 }
85#else
86 std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1);
87 if (!library.Open(filename.c_str())) {
88 // Android devices may not have libvulkan.so.1, only libvulkan.so.
89 filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
90 library.Open(filename.c_str());
91 }
92#endif
93 return library;
94}
95
96vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
97 WindowSystemType window_type = WindowSystemType::Headless,
98 bool enable_layers = false) {
99 if (!library.IsOpen()) {
100 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
101 return {};
102 }
103 if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) {
104 LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan");
105 return {};
106 }
107 if (!vk::Load(dld)) {
108 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
109 return {};
110 }
111
112 std::vector<const char*> extensions;
113 extensions.reserve(6);
114 switch (window_type) {
115 case Core::Frontend::WindowSystemType::Headless:
116 break;
117#ifdef _WIN32
118 case Core::Frontend::WindowSystemType::Windows:
119 extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
120 break;
121#endif
122#ifdef __linux__
123 case Core::Frontend::WindowSystemType::X11:
124 extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
125 break;
126 case Core::Frontend::WindowSystemType::Wayland:
127 extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
128 break;
129#endif
130 default:
131 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
132 break;
133 }
134 if (window_type != Core::Frontend::WindowSystemType::Headless) {
135 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
136 }
137 if (enable_layers) {
138 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
139 }
140 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
141
142 const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
143 if (!properties) {
144 LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
145 return {};
146 }
147
148 for (const char* extension : extensions) {
149 const auto it =
150 std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) {
151 return !std::strcmp(extension, prop.extensionName);
152 });
153 if (it == properties->end()) {
154 LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
155 return {};
156 }
157 }
158
159 static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"};
160 vk::Span<const char*> layers = layers_data;
161 if (!enable_layers) {
162 layers = {};
163 }
164 vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
165 if (!instance) {
166 LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
167 return {};
168 }
169 if (!vk::Load(*instance, dld)) {
170 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers");
171 }
172 return instance;
173}
174
56std::string GetReadableVersion(u32 version) { 175std::string GetReadableVersion(u32 version) {
57 return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), 176 return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version),
58 VK_VERSION_PATCH(version)); 177 VK_VERSION_PATCH(version));
@@ -63,14 +182,14 @@ std::string GetDriverVersion(const VKDevice& device) {
63 // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 182 // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314
64 const u32 version = device.GetDriverVersion(); 183 const u32 version = device.GetDriverVersion();
65 184
66 if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { 185 if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) {
67 const u32 major = (version >> 22) & 0x3ff; 186 const u32 major = (version >> 22) & 0x3ff;
68 const u32 minor = (version >> 14) & 0x0ff; 187 const u32 minor = (version >> 14) & 0x0ff;
69 const u32 secondary = (version >> 6) & 0x0ff; 188 const u32 secondary = (version >> 6) & 0x0ff;
70 const u32 tertiary = version & 0x003f; 189 const u32 tertiary = version & 0x003f;
71 return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); 190 return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary);
72 } 191 }
73 if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) { 192 if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
74 const u32 major = version >> 14; 193 const u32 major = version >> 14;
75 const u32 minor = version & 0x3fff; 194 const u32 minor = version & 0x3fff;
76 return fmt::format("{}.{}", major, minor); 195 return fmt::format("{}.{}", major, minor);
@@ -147,27 +266,12 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
147} 266}
148 267
149bool RendererVulkan::Init() { 268bool RendererVulkan::Init() {
150 PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; 269 library = OpenVulkanLibrary();
151 render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); 270 instance = CreateInstance(library, dld, render_window.GetWindowInfo().type,
152 const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); 271 Settings::values.renderer_debug);
153 272 if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
154 std::optional<vk::DebugUtilsMessengerEXT> callback;
155 if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) {
156 callback = CreateDebugCallback(dldi);
157 if (!callback) {
158 return false;
159 }
160 }
161
162 if (!PickDevices(dldi)) {
163 if (callback) {
164 instance.destroy(*callback, nullptr, dldi);
165 }
166 return false; 273 return false;
167 } 274 }
168 debug_callback = UniqueDebugUtilsMessengerEXT(
169 *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>(
170 instance, nullptr, device->GetDispatchLoader()));
171 275
172 Report(); 276 Report();
173 277
@@ -176,7 +280,7 @@ bool RendererVulkan::Init() {
176 resource_manager = std::make_unique<VKResourceManager>(*device); 280 resource_manager = std::make_unique<VKResourceManager>(*device);
177 281
178 const auto& framebuffer = render_window.GetFramebufferLayout(); 282 const auto& framebuffer = render_window.GetFramebufferLayout();
179 swapchain = std::make_unique<VKSwapchain>(surface, *device); 283 swapchain = std::make_unique<VKSwapchain>(*surface, *device);
180 swapchain->Create(framebuffer.width, framebuffer.height, false); 284 swapchain->Create(framebuffer.width, framebuffer.height, false);
181 285
182 state_tracker = std::make_unique<StateTracker>(system); 286 state_tracker = std::make_unique<StateTracker>(system);
@@ -198,10 +302,8 @@ void RendererVulkan::ShutDown() {
198 if (!device) { 302 if (!device) {
199 return; 303 return;
200 } 304 }
201 const auto dev = device->GetLogical(); 305 if (const auto& dev = device->GetLogical()) {
202 const auto& dld = device->GetDispatchLoader(); 306 dev.WaitIdle();
203 if (dev && dld.vkDeviceWaitIdle) {
204 dev.waitIdle(dld);
205 } 307 }
206 308
207 rasterizer.reset(); 309 rasterizer.reset();
@@ -213,44 +315,94 @@ void RendererVulkan::ShutDown() {
213 device.reset(); 315 device.reset();
214} 316}
215 317
216std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( 318bool RendererVulkan::CreateDebugCallback() {
217 const vk::DispatchLoaderDynamic& dldi) { 319 if (!Settings::values.renderer_debug) {
218 const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( 320 return true;
219 {}, 321 }
220 vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | 322 debug_callback = instance.TryCreateDebugCallback(DebugCallback);
221 vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | 323 if (!debug_callback) {
222 vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo |
223 vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose,
224 vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral |
225 vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation |
226 vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance,
227 &DebugCallback, nullptr);
228 vk::DebugUtilsMessengerEXT callback;
229 if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) !=
230 vk::Result::eSuccess) {
231 LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); 324 LOG_ERROR(Render_Vulkan, "Failed to create debug callback");
232 return {}; 325 return false;
233 } 326 }
234 return callback; 327 return true;
235} 328}
236 329
237bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { 330bool RendererVulkan::CreateSurface() {
238 const auto devices = instance.enumeratePhysicalDevices(dldi); 331 [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo();
332 VkSurfaceKHR unsafe_surface = nullptr;
333
334#ifdef _WIN32
335 if (window_info.type == Core::Frontend::WindowSystemType::Windows) {
336 const HWND hWnd = static_cast<HWND>(window_info.render_surface);
337 const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
338 nullptr, 0, nullptr, hWnd};
339 const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>(
340 dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR"));
341 if (!vkCreateWin32SurfaceKHR ||
342 vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
343 LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
344 return false;
345 }
346 }
347#endif
348#ifdef __linux__
349 if (window_info.type == Core::Frontend::WindowSystemType::X11) {
350 const VkXlibSurfaceCreateInfoKHR xlib_ci{
351 VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0,
352 static_cast<Display*>(window_info.display_connection),
353 reinterpret_cast<Window>(window_info.render_surface)};
354 const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>(
355 dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR"));
356 if (!vkCreateXlibSurfaceKHR ||
357 vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) {
358 LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
359 return false;
360 }
361 }
362 if (window_info.type == Core::Frontend::WindowSystemType::Wayland) {
363 const VkWaylandSurfaceCreateInfoKHR wayland_ci{
364 VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0,
365 static_cast<wl_display*>(window_info.display_connection),
366 static_cast<wl_surface*>(window_info.render_surface)};
367 const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>(
368 dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR"));
369 if (!vkCreateWaylandSurfaceKHR ||
370 vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) !=
371 VK_SUCCESS) {
372 LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
373 return false;
374 }
375 }
376#endif
377 if (!unsafe_surface) {
378 LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
379 return false;
380 }
381
382 surface = vk::SurfaceKHR(unsafe_surface, *instance, dld);
383 return true;
384}
385
386bool RendererVulkan::PickDevices() {
387 const auto devices = instance.EnumeratePhysicalDevices();
388 if (!devices) {
389 LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices");
390 return false;
391 }
239 392
240 // TODO(Rodrigo): Choose device from config file
241 const s32 device_index = Settings::values.vulkan_device; 393 const s32 device_index = Settings::values.vulkan_device;
242 if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { 394 if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) {
243 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); 395 LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index);
244 return false; 396 return false;
245 } 397 }
246 const vk::PhysicalDevice physical_device = devices[device_index]; 398 const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)],
247 399 dld);
248 if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { 400 if (!VKDevice::IsSuitable(physical_device, *surface)) {
249 return false; 401 return false;
250 } 402 }
251 403
252 device = std::make_unique<VKDevice>(dldi, physical_device, surface); 404 device = std::make_unique<VKDevice>(*instance, physical_device, *surface, dld);
253 return device->Create(dldi, instance); 405 return device->Create();
254} 406}
255 407
256void RendererVulkan::Report() const { 408void RendererVulkan::Report() const {
@@ -276,4 +428,25 @@ void RendererVulkan::Report() const {
276 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); 428 telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
277} 429}
278 430
431std::vector<std::string> RendererVulkan::EnumerateDevices() {
432 vk::InstanceDispatch dld;
433 Common::DynamicLibrary library = OpenVulkanLibrary();
434 vk::Instance instance = CreateInstance(library, dld);
435 if (!instance) {
436 return {};
437 }
438
439 const std::optional physical_devices = instance.EnumeratePhysicalDevices();
440 if (!physical_devices) {
441 return {};
442 }
443
444 std::vector<std::string> names;
445 names.reserve(physical_devices->size());
446 for (const auto& device : *physical_devices) {
447 names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName);
448 }
449 return names;
450}
451
279} // namespace Vulkan 452} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index d14384e79..18270909b 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -6,10 +6,13 @@
6 6
7#include <memory> 7#include <memory>
8#include <optional> 8#include <optional>
9#include <string>
9#include <vector> 10#include <vector>
10 11
12#include "common/dynamic_library.h"
13
11#include "video_core/renderer_base.h" 14#include "video_core/renderer_base.h"
12#include "video_core/renderer_vulkan/declarations.h" 15#include "video_core/renderer_vulkan/wrapper.h"
13 16
14namespace Core { 17namespace Core {
15class System; 18class System;
@@ -44,22 +47,28 @@ public:
44 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; 47 void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
45 bool TryPresent(int timeout_ms) override; 48 bool TryPresent(int timeout_ms) override;
46 49
50 static std::vector<std::string> EnumerateDevices();
51
47private: 52private:
48 std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( 53 bool CreateDebugCallback();
49 const vk::DispatchLoaderDynamic& dldi);
50 54
51 bool PickDevices(const vk::DispatchLoaderDynamic& dldi); 55 bool CreateSurface();
56
57 bool PickDevices();
52 58
53 void Report() const; 59 void Report() const;
54 60
55 Core::System& system; 61 Core::System& system;
56 62
63 Common::DynamicLibrary library;
64 vk::InstanceDispatch dld;
65
57 vk::Instance instance; 66 vk::Instance instance;
58 vk::SurfaceKHR surface; 67 vk::SurfaceKHR surface;
59 68
60 VKScreenInfo screen_info; 69 VKScreenInfo screen_info;
61 70
62 UniqueDebugUtilsMessengerEXT debug_callback; 71 vk::DebugCallback debug_callback;
63 std::unique_ptr<VKDevice> device; 72 std::unique_ptr<VKDevice> device;
64 std::unique_ptr<VKSwapchain> swapchain; 73 std::unique_ptr<VKSwapchain> swapchain;
65 std::unique_ptr<VKMemoryManager> memory_manager; 74 std::unique_ptr<VKMemoryManager> memory_manager;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 855cfc883..fbd406f2b 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -20,7 +20,6 @@
20#include "video_core/gpu.h" 20#include "video_core/gpu.h"
21#include "video_core/morton.h" 21#include "video_core/morton.h"
22#include "video_core/rasterizer_interface.h" 22#include "video_core/rasterizer_interface.h"
23#include "video_core/renderer_vulkan/declarations.h"
24#include "video_core/renderer_vulkan/renderer_vulkan.h" 23#include "video_core/renderer_vulkan/renderer_vulkan.h"
25#include "video_core/renderer_vulkan/vk_blit_screen.h" 24#include "video_core/renderer_vulkan/vk_blit_screen.h"
26#include "video_core/renderer_vulkan/vk_device.h" 25#include "video_core/renderer_vulkan/vk_device.h"
@@ -30,6 +29,7 @@
30#include "video_core/renderer_vulkan/vk_scheduler.h" 29#include "video_core/renderer_vulkan/vk_scheduler.h"
31#include "video_core/renderer_vulkan/vk_shader_util.h" 30#include "video_core/renderer_vulkan/vk_shader_util.h"
32#include "video_core/renderer_vulkan/vk_swapchain.h" 31#include "video_core/renderer_vulkan/vk_swapchain.h"
32#include "video_core/renderer_vulkan/wrapper.h"
33#include "video_core/surface.h" 33#include "video_core/surface.h"
34 34
35namespace Vulkan { 35namespace Vulkan {
@@ -140,16 +140,25 @@ struct ScreenRectVertex {
140 std::array<f32, 2> position; 140 std::array<f32, 2> position;
141 std::array<f32, 2> tex_coord; 141 std::array<f32, 2> tex_coord;
142 142
143 static vk::VertexInputBindingDescription GetDescription() { 143 static VkVertexInputBindingDescription GetDescription() {
144 return vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertex), 144 VkVertexInputBindingDescription description;
145 vk::VertexInputRate::eVertex); 145 description.binding = 0;
146 description.stride = sizeof(ScreenRectVertex);
147 description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
148 return description;
146 } 149 }
147 150
148 static std::array<vk::VertexInputAttributeDescription, 2> GetAttributes() { 151 static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() {
149 return {vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, 152 std::array<VkVertexInputAttributeDescription, 2> attributes;
150 offsetof(ScreenRectVertex, position)), 153 attributes[0].location = 0;
151 vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32Sfloat, 154 attributes[0].binding = 0;
152 offsetof(ScreenRectVertex, tex_coord))}; 155 attributes[0].format = VK_FORMAT_R32G32_SFLOAT;
156 attributes[0].offset = offsetof(ScreenRectVertex, position);
157 attributes[1].location = 1;
158 attributes[1].binding = 0;
159 attributes[1].format = VK_FORMAT_R32G32_SFLOAT;
160 attributes[1].offset = offsetof(ScreenRectVertex, tex_coord);
161 return attributes;
153 } 162 }
154}; 163};
155 164
@@ -172,16 +181,16 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
172 static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer); 181 static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer);
173} 182}
174 183
175vk::Format GetFormat(const Tegra::FramebufferConfig& framebuffer) { 184VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) {
176 switch (framebuffer.pixel_format) { 185 switch (framebuffer.pixel_format) {
177 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 186 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
178 return vk::Format::eA8B8G8R8UnormPack32; 187 return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
179 case Tegra::FramebufferConfig::PixelFormat::RGB565: 188 case Tegra::FramebufferConfig::PixelFormat::RGB565:
180 return vk::Format::eR5G6B5UnormPack16; 189 return VK_FORMAT_R5G6B5_UNORM_PACK16;
181 default: 190 default:
182 UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", 191 UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
183 static_cast<u32>(framebuffer.pixel_format)); 192 static_cast<u32>(framebuffer.pixel_format));
184 return vk::Format::eA8B8G8R8UnormPack32; 193 return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
185 } 194 }
186} 195}
187 196
@@ -219,8 +228,8 @@ void VKBlitScreen::Recreate() {
219 CreateDynamicResources(); 228 CreateDynamicResources();
220} 229}
221 230
222std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, 231std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
223 bool use_accelerated) { 232 bool use_accelerated) {
224 RefreshResources(framebuffer); 233 RefreshResources(framebuffer);
225 234
226 // Finish any pending renderpass 235 // Finish any pending renderpass
@@ -255,46 +264,76 @@ std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferC
255 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, 264 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
256 map.GetAddress() + image_offset, host_ptr); 265 map.GetAddress() + image_offset, host_ptr);
257 266
258 blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eTransfer, 267 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
259 vk::AccessFlagBits::eTransferWrite, 268 VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
260 vk::ImageLayout::eTransferDstOptimal); 269
261 270 VkBufferImageCopy copy;
262 const vk::BufferImageCopy copy(image_offset, 0, 0, 271 copy.bufferOffset = image_offset;
263 {vk::ImageAspectFlagBits::eColor, 0, 0, 1}, {0, 0, 0}, 272 copy.bufferRowLength = 0;
264 {framebuffer.width, framebuffer.height, 1}); 273 copy.bufferImageHeight = 0;
265 scheduler.Record([buffer_handle = *buffer, image = blit_image->GetHandle(), 274 copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
266 copy](auto cmdbuf, auto& dld) { 275 copy.imageSubresource.mipLevel = 0;
267 cmdbuf.copyBufferToImage(buffer_handle, image, vk::ImageLayout::eTransferDstOptimal, 276 copy.imageSubresource.baseArrayLayer = 0;
268 {copy}, dld); 277 copy.imageSubresource.layerCount = 1;
269 }); 278 copy.imageOffset.x = 0;
279 copy.imageOffset.y = 0;
280 copy.imageOffset.z = 0;
281 copy.imageExtent.width = framebuffer.width;
282 copy.imageExtent.height = framebuffer.height;
283 copy.imageExtent.depth = 1;
284 scheduler.Record(
285 [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) {
286 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
287 });
270 } 288 }
271 map.Release(); 289 map.Release();
272 290
273 blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eFragmentShader, 291 blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
274 vk::AccessFlagBits::eShaderRead, 292 VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
275 vk::ImageLayout::eShaderReadOnlyOptimal);
276 293
277 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], 294 scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
278 descriptor_set = descriptor_sets[image_index], buffer = *buffer, 295 descriptor_set = descriptor_sets[image_index], buffer = *buffer,
279 size = swapchain.GetSize(), pipeline = *pipeline, 296 size = swapchain.GetSize(), pipeline = *pipeline,
280 layout = *pipeline_layout](auto cmdbuf, auto& dld) { 297 layout = *pipeline_layout](vk::CommandBuffer cmdbuf) {
281 const vk::ClearValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}}; 298 VkClearValue clear_color;
282 const vk::RenderPassBeginInfo renderpass_bi(renderpass, framebuffer, {{0, 0}, size}, 1, 299 clear_color.color.float32[0] = 0.0f;
283 &clear_color); 300 clear_color.color.float32[1] = 0.0f;
284 301 clear_color.color.float32[2] = 0.0f;
285 cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); 302 clear_color.color.float32[3] = 0.0f;
286 cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); 303
287 cmdbuf.setViewport( 304 VkRenderPassBeginInfo renderpass_bi;
288 0, 305 renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
289 {{0.0f, 0.0f, static_cast<f32>(size.width), static_cast<f32>(size.height), 0.0f, 1.0f}}, 306 renderpass_bi.pNext = nullptr;
290 dld); 307 renderpass_bi.renderPass = renderpass;
291 cmdbuf.setScissor(0, {{{0, 0}, size}}, dld); 308 renderpass_bi.framebuffer = framebuffer;
292 309 renderpass_bi.renderArea.offset.x = 0;
293 cmdbuf.bindVertexBuffers(0, {buffer}, {offsetof(BufferData, vertices)}, dld); 310 renderpass_bi.renderArea.offset.y = 0;
294 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, {descriptor_set}, {}, 311 renderpass_bi.renderArea.extent = size;
295 dld); 312 renderpass_bi.clearValueCount = 1;
296 cmdbuf.draw(4, 1, 0, 0, dld); 313 renderpass_bi.pClearValues = &clear_color;
297 cmdbuf.endRenderPass(dld); 314
315 VkViewport viewport;
316 viewport.x = 0.0f;
317 viewport.y = 0.0f;
318 viewport.width = static_cast<float>(size.width);
319 viewport.height = static_cast<float>(size.height);
320 viewport.minDepth = 0.0f;
321 viewport.maxDepth = 1.0f;
322
323 VkRect2D scissor;
324 scissor.offset.x = 0;
325 scissor.offset.y = 0;
326 scissor.extent = size;
327
328 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
329 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
330 cmdbuf.SetViewport(0, viewport);
331 cmdbuf.SetScissor(0, scissor);
332
333 cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices));
334 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {});
335 cmdbuf.Draw(4, 1, 0, 0);
336 cmdbuf.EndRenderPass();
298 }); 337 });
299 338
300 return {scheduler.GetFence(), *semaphores[image_index]}; 339 return {scheduler.GetFence(), *semaphores[image_index]};
@@ -334,165 +373,297 @@ void VKBlitScreen::CreateShaders() {
334} 373}
335 374
336void VKBlitScreen::CreateSemaphores() { 375void VKBlitScreen::CreateSemaphores() {
337 const auto dev = device.GetLogical();
338 const auto& dld = device.GetDispatchLoader();
339
340 semaphores.resize(image_count); 376 semaphores.resize(image_count);
341 for (std::size_t i = 0; i < image_count; ++i) { 377 std::generate(semaphores.begin(), semaphores.end(),
342 semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); 378 [this] { return device.GetLogical().CreateSemaphore(); });
343 }
344} 379}
345 380
346void VKBlitScreen::CreateDescriptorPool() { 381void VKBlitScreen::CreateDescriptorPool() {
347 const std::array<vk::DescriptorPoolSize, 2> pool_sizes{ 382 std::array<VkDescriptorPoolSize, 2> pool_sizes;
348 vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, static_cast<u32>(image_count)}, 383 pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
349 vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, 384 pool_sizes[0].descriptorCount = static_cast<u32>(image_count);
350 static_cast<u32>(image_count)}}; 385 pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
351 const vk::DescriptorPoolCreateInfo pool_ci( 386 pool_sizes[1].descriptorCount = static_cast<u32>(image_count);
352 {}, static_cast<u32>(image_count), static_cast<u32>(pool_sizes.size()), pool_sizes.data()); 387
353 const auto dev = device.GetLogical(); 388 VkDescriptorPoolCreateInfo ci;
354 descriptor_pool = dev.createDescriptorPoolUnique(pool_ci, nullptr, device.GetDispatchLoader()); 389 ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
390 ci.pNext = nullptr;
391 ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
392 ci.maxSets = static_cast<u32>(image_count);
393 ci.poolSizeCount = static_cast<u32>(pool_sizes.size());
394 ci.pPoolSizes = pool_sizes.data();
395 descriptor_pool = device.GetLogical().CreateDescriptorPool(ci);
355} 396}
356 397
357void VKBlitScreen::CreateRenderPass() { 398void VKBlitScreen::CreateRenderPass() {
358 const vk::AttachmentDescription color_attachment( 399 VkAttachmentDescription color_attachment;
359 {}, swapchain.GetImageFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear, 400 color_attachment.flags = 0;
360 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, 401 color_attachment.format = swapchain.GetImageFormat();
361 vk::AttachmentStoreOp::eDontCare, vk::ImageLayout::eUndefined, 402 color_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
362 vk::ImageLayout::ePresentSrcKHR); 403 color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
363 404 color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
364 const vk::AttachmentReference color_attachment_ref(0, vk::ImageLayout::eColorAttachmentOptimal); 405 color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
365 406 color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
366 const vk::SubpassDescription subpass_description({}, vk::PipelineBindPoint::eGraphics, 0, 407 color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
367 nullptr, 1, &color_attachment_ref, nullptr, 408 color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
368 nullptr, 0, nullptr); 409
369 410 VkAttachmentReference color_attachment_ref;
370 const vk::SubpassDependency dependency( 411 color_attachment_ref.attachment = 0;
371 VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eColorAttachmentOutput, 412 color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
372 vk::PipelineStageFlagBits::eColorAttachmentOutput, {}, 413
373 vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite, {}); 414 VkSubpassDescription subpass_description;
374 415 subpass_description.flags = 0;
375 const vk::RenderPassCreateInfo renderpass_ci({}, 1, &color_attachment, 1, &subpass_description, 416 subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
376 1, &dependency); 417 subpass_description.inputAttachmentCount = 0;
377 418 subpass_description.pInputAttachments = nullptr;
378 const auto dev = device.GetLogical(); 419 subpass_description.colorAttachmentCount = 1;
379 renderpass = dev.createRenderPassUnique(renderpass_ci, nullptr, device.GetDispatchLoader()); 420 subpass_description.pColorAttachments = &color_attachment_ref;
421 subpass_description.pResolveAttachments = nullptr;
422 subpass_description.pDepthStencilAttachment = nullptr;
423 subpass_description.preserveAttachmentCount = 0;
424 subpass_description.pPreserveAttachments = nullptr;
425
426 VkSubpassDependency dependency;
427 dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
428 dependency.dstSubpass = 0;
429 dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
430 dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
431 dependency.srcAccessMask = 0;
432 dependency.dstAccessMask =
433 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
434 dependency.dependencyFlags = 0;
435
436 VkRenderPassCreateInfo renderpass_ci;
437 renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
438 renderpass_ci.pNext = nullptr;
439 renderpass_ci.flags = 0;
440 renderpass_ci.attachmentCount = 1;
441 renderpass_ci.pAttachments = &color_attachment;
442 renderpass_ci.subpassCount = 1;
443 renderpass_ci.pSubpasses = &subpass_description;
444 renderpass_ci.dependencyCount = 1;
445 renderpass_ci.pDependencies = &dependency;
446
447 renderpass = device.GetLogical().CreateRenderPass(renderpass_ci);
380} 448}
381 449
382void VKBlitScreen::CreateDescriptorSetLayout() { 450void VKBlitScreen::CreateDescriptorSetLayout() {
383 const std::array<vk::DescriptorSetLayoutBinding, 2> layout_bindings{ 451 std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings;
384 vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1, 452 layout_bindings[0].binding = 0;
385 vk::ShaderStageFlagBits::eVertex, nullptr), 453 layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
386 vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eCombinedImageSampler, 1, 454 layout_bindings[0].descriptorCount = 1;
387 vk::ShaderStageFlagBits::eFragment, nullptr)}; 455 layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
388 const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( 456 layout_bindings[0].pImmutableSamplers = nullptr;
389 {}, static_cast<u32>(layout_bindings.size()), layout_bindings.data()); 457 layout_bindings[1].binding = 1;
390 458 layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
391 const auto dev = device.GetLogical(); 459 layout_bindings[1].descriptorCount = 1;
392 const auto& dld = device.GetDispatchLoader(); 460 layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
393 descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); 461 layout_bindings[1].pImmutableSamplers = nullptr;
462
463 VkDescriptorSetLayoutCreateInfo ci;
464 ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
465 ci.pNext = nullptr;
466 ci.flags = 0;
467 ci.bindingCount = static_cast<u32>(layout_bindings.size());
468 ci.pBindings = layout_bindings.data();
469
470 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci);
394} 471}
395 472
396void VKBlitScreen::CreateDescriptorSets() { 473void VKBlitScreen::CreateDescriptorSets() {
397 const auto dev = device.GetLogical(); 474 const std::vector layouts(image_count, *descriptor_set_layout);
398 const auto& dld = device.GetDispatchLoader(); 475
399 476 VkDescriptorSetAllocateInfo ai;
400 descriptor_sets.resize(image_count); 477 ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
401 for (std::size_t i = 0; i < image_count; ++i) { 478 ai.pNext = nullptr;
402 const vk::DescriptorSetLayout layout = *descriptor_set_layout; 479 ai.descriptorPool = *descriptor_pool;
403 const vk::DescriptorSetAllocateInfo descriptor_set_ai(*descriptor_pool, 1, &layout); 480 ai.descriptorSetCount = static_cast<u32>(image_count);
404 const vk::Result result = 481 ai.pSetLayouts = layouts.data();
405 dev.allocateDescriptorSets(&descriptor_set_ai, &descriptor_sets[i], dld); 482 descriptor_sets = descriptor_pool.Allocate(ai);
406 ASSERT(result == vk::Result::eSuccess);
407 }
408} 483}
409 484
410void VKBlitScreen::CreatePipelineLayout() { 485void VKBlitScreen::CreatePipelineLayout() {
411 const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &descriptor_set_layout.get(), 0, 486 VkPipelineLayoutCreateInfo ci;
412 nullptr); 487 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
413 const auto dev = device.GetLogical(); 488 ci.pNext = nullptr;
414 const auto& dld = device.GetDispatchLoader(); 489 ci.flags = 0;
415 pipeline_layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); 490 ci.setLayoutCount = 1;
491 ci.pSetLayouts = descriptor_set_layout.address();
492 ci.pushConstantRangeCount = 0;
493 ci.pPushConstantRanges = nullptr;
494 pipeline_layout = device.GetLogical().CreatePipelineLayout(ci);
416} 495}
417 496
418void VKBlitScreen::CreateGraphicsPipeline() { 497void VKBlitScreen::CreateGraphicsPipeline() {
419 const std::array shader_stages = { 498 std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages;
420 vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eVertex, *vertex_shader, 499 shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
421 "main", nullptr), 500 shader_stages[0].pNext = nullptr;
422 vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eFragment, *fragment_shader, 501 shader_stages[0].flags = 0;
423 "main", nullptr)}; 502 shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
503 shader_stages[0].module = *vertex_shader;
504 shader_stages[0].pName = "main";
505 shader_stages[0].pSpecializationInfo = nullptr;
506 shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
507 shader_stages[1].pNext = nullptr;
508 shader_stages[1].flags = 0;
509 shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
510 shader_stages[1].module = *fragment_shader;
511 shader_stages[1].pName = "main";
512 shader_stages[1].pSpecializationInfo = nullptr;
424 513
425 const auto vertex_binding_description = ScreenRectVertex::GetDescription(); 514 const auto vertex_binding_description = ScreenRectVertex::GetDescription();
426 const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); 515 const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
427 const vk::PipelineVertexInputStateCreateInfo vertex_input(
428 {}, 1, &vertex_binding_description, static_cast<u32>(vertex_attrs_description.size()),
429 vertex_attrs_description.data());
430
431 const vk::PipelineInputAssemblyStateCreateInfo input_assembly(
432 {}, vk::PrimitiveTopology::eTriangleStrip, false);
433
434 // Set a dummy viewport, it's going to be replaced by dynamic states.
435 const vk::Viewport viewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
436 const vk::Rect2D scissor({0, 0}, {1, 1});
437 516
438 const vk::PipelineViewportStateCreateInfo viewport_state({}, 1, &viewport, 1, &scissor); 517 VkPipelineVertexInputStateCreateInfo vertex_input_ci;
439 518 vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
440 const vk::PipelineRasterizationStateCreateInfo rasterizer( 519 vertex_input_ci.pNext = nullptr;
441 {}, false, false, vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, 520 vertex_input_ci.flags = 0;
442 vk::FrontFace::eClockwise, false, 0.0f, 0.0f, 0.0f, 1.0f); 521 vertex_input_ci.vertexBindingDescriptionCount = 1;
443 522 vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description;
444 const vk::PipelineMultisampleStateCreateInfo multisampling({}, vk::SampleCountFlagBits::e1, 523 vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()};
445 false, 0.0f, nullptr, false, false); 524 vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data();
446 525
447 const vk::PipelineColorBlendAttachmentState color_blend_attachment( 526 VkPipelineInputAssemblyStateCreateInfo input_assembly_ci;
448 false, vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, 527 input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
449 vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, 528 input_assembly_ci.pNext = nullptr;
450 vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | 529 input_assembly_ci.flags = 0;
451 vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); 530 input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
452 531 input_assembly_ci.primitiveRestartEnable = VK_FALSE;
453 const vk::PipelineColorBlendStateCreateInfo color_blending( 532
454 {}, false, vk::LogicOp::eCopy, 1, &color_blend_attachment, {0.0f, 0.0f, 0.0f, 0.0f}); 533 VkPipelineViewportStateCreateInfo viewport_state_ci;
455 534 viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
456 const std::array<vk::DynamicState, 2> dynamic_states = {vk::DynamicState::eViewport, 535 viewport_state_ci.pNext = nullptr;
457 vk::DynamicState::eScissor}; 536 viewport_state_ci.flags = 0;
458 537 viewport_state_ci.viewportCount = 1;
459 const vk::PipelineDynamicStateCreateInfo dynamic_state( 538 viewport_state_ci.pViewports = nullptr;
460 {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); 539 viewport_state_ci.scissorCount = 1;
461 540 viewport_state_ci.pScissors = nullptr;
462 const vk::GraphicsPipelineCreateInfo pipeline_ci( 541
463 {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input, 542 VkPipelineRasterizationStateCreateInfo rasterization_ci;
464 &input_assembly, nullptr, &viewport_state, &rasterizer, &multisampling, nullptr, 543 rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
465 &color_blending, &dynamic_state, *pipeline_layout, *renderpass, 0, nullptr, 0); 544 rasterization_ci.pNext = nullptr;
466 545 rasterization_ci.flags = 0;
467 const auto dev = device.GetLogical(); 546 rasterization_ci.depthClampEnable = VK_FALSE;
468 const auto& dld = device.GetDispatchLoader(); 547 rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
469 pipeline = dev.createGraphicsPipelineUnique({}, pipeline_ci, nullptr, dld); 548 rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
549 rasterization_ci.cullMode = VK_CULL_MODE_NONE;
550 rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE;
551 rasterization_ci.depthBiasEnable = VK_FALSE;
552 rasterization_ci.depthBiasConstantFactor = 0.0f;
553 rasterization_ci.depthBiasClamp = 0.0f;
554 rasterization_ci.depthBiasSlopeFactor = 0.0f;
555 rasterization_ci.lineWidth = 1.0f;
556
557 VkPipelineMultisampleStateCreateInfo multisampling_ci;
558 multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
559 multisampling_ci.pNext = nullptr;
560 multisampling_ci.flags = 0;
561 multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
562 multisampling_ci.sampleShadingEnable = VK_FALSE;
563 multisampling_ci.minSampleShading = 0.0f;
564 multisampling_ci.pSampleMask = nullptr;
565 multisampling_ci.alphaToCoverageEnable = VK_FALSE;
566 multisampling_ci.alphaToOneEnable = VK_FALSE;
567
568 VkPipelineColorBlendAttachmentState color_blend_attachment;
569 color_blend_attachment.blendEnable = VK_FALSE;
570 color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO;
571 color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
572 color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD;
573 color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
574 color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
575 color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD;
576 color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
577 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
578
579 VkPipelineColorBlendStateCreateInfo color_blend_ci;
580 color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
581 color_blend_ci.flags = 0;
582 color_blend_ci.pNext = nullptr;
583 color_blend_ci.logicOpEnable = VK_FALSE;
584 color_blend_ci.logicOp = VK_LOGIC_OP_COPY;
585 color_blend_ci.attachmentCount = 1;
586 color_blend_ci.pAttachments = &color_blend_attachment;
587 color_blend_ci.blendConstants[0] = 0.0f;
588 color_blend_ci.blendConstants[1] = 0.0f;
589 color_blend_ci.blendConstants[2] = 0.0f;
590 color_blend_ci.blendConstants[3] = 0.0f;
591
592 static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT,
593 VK_DYNAMIC_STATE_SCISSOR};
594 VkPipelineDynamicStateCreateInfo dynamic_state_ci;
595 dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
596 dynamic_state_ci.pNext = nullptr;
597 dynamic_state_ci.flags = 0;
598 dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size());
599 dynamic_state_ci.pDynamicStates = dynamic_states.data();
600
601 VkGraphicsPipelineCreateInfo pipeline_ci;
602 pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
603 pipeline_ci.pNext = nullptr;
604 pipeline_ci.flags = 0;
605 pipeline_ci.stageCount = static_cast<u32>(shader_stages.size());
606 pipeline_ci.pStages = shader_stages.data();
607 pipeline_ci.pVertexInputState = &vertex_input_ci;
608 pipeline_ci.pInputAssemblyState = &input_assembly_ci;
609 pipeline_ci.pTessellationState = nullptr;
610 pipeline_ci.pViewportState = &viewport_state_ci;
611 pipeline_ci.pRasterizationState = &rasterization_ci;
612 pipeline_ci.pMultisampleState = &multisampling_ci;
613 pipeline_ci.pDepthStencilState = nullptr;
614 pipeline_ci.pColorBlendState = &color_blend_ci;
615 pipeline_ci.pDynamicState = &dynamic_state_ci;
616 pipeline_ci.layout = *pipeline_layout;
617 pipeline_ci.renderPass = *renderpass;
618 pipeline_ci.subpass = 0;
619 pipeline_ci.basePipelineHandle = 0;
620 pipeline_ci.basePipelineIndex = 0;
621
622 pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci);
470} 623}
471 624
472void VKBlitScreen::CreateSampler() { 625void VKBlitScreen::CreateSampler() {
473 const auto dev = device.GetLogical(); 626 VkSamplerCreateInfo ci;
474 const auto& dld = device.GetDispatchLoader(); 627 ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
475 const vk::SamplerCreateInfo sampler_ci( 628 ci.pNext = nullptr;
476 {}, vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerMipmapMode::eLinear, 629 ci.flags = 0;
477 vk::SamplerAddressMode::eClampToBorder, vk::SamplerAddressMode::eClampToBorder, 630 ci.magFilter = VK_FILTER_LINEAR;
478 vk::SamplerAddressMode::eClampToBorder, 0.0f, false, 0.0f, false, vk::CompareOp::eNever, 631 ci.minFilter = VK_FILTER_NEAREST;
479 0.0f, 0.0f, vk::BorderColor::eFloatOpaqueBlack, false); 632 ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
480 sampler = dev.createSamplerUnique(sampler_ci, nullptr, dld); 633 ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
634 ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
635 ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
636 ci.mipLodBias = 0.0f;
637 ci.anisotropyEnable = VK_FALSE;
638 ci.maxAnisotropy = 0.0f;
639 ci.compareEnable = VK_FALSE;
640 ci.compareOp = VK_COMPARE_OP_NEVER;
641 ci.minLod = 0.0f;
642 ci.maxLod = 0.0f;
643 ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
644 ci.unnormalizedCoordinates = VK_FALSE;
645
646 sampler = device.GetLogical().CreateSampler(ci);
481} 647}
482 648
483void VKBlitScreen::CreateFramebuffers() { 649void VKBlitScreen::CreateFramebuffers() {
484 const vk::Extent2D size{swapchain.GetSize()}; 650 const VkExtent2D size{swapchain.GetSize()};
485 framebuffers.clear();
486 framebuffers.resize(image_count); 651 framebuffers.resize(image_count);
487 652
488 const auto dev = device.GetLogical(); 653 VkFramebufferCreateInfo ci;
489 const auto& dld = device.GetDispatchLoader(); 654 ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
655 ci.pNext = nullptr;
656 ci.flags = 0;
657 ci.renderPass = *renderpass;
658 ci.attachmentCount = 1;
659 ci.width = size.width;
660 ci.height = size.height;
661 ci.layers = 1;
490 662
491 for (std::size_t i = 0; i < image_count; ++i) { 663 for (std::size_t i = 0; i < image_count; ++i) {
492 const vk::ImageView image_view{swapchain.GetImageViewIndex(i)}; 664 const VkImageView image_view{swapchain.GetImageViewIndex(i)};
493 const vk::FramebufferCreateInfo framebuffer_ci({}, *renderpass, 1, &image_view, size.width, 665 ci.pAttachments = &image_view;
494 size.height, 1); 666 framebuffers[i] = device.GetLogical().CreateFramebuffer(ci);
495 framebuffers[i] = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
496 } 667 }
497} 668}
498 669
@@ -507,54 +678,86 @@ void VKBlitScreen::ReleaseRawImages() {
507} 678}
508 679
509void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { 680void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
510 const auto dev = device.GetLogical(); 681 VkBufferCreateInfo ci;
511 const auto& dld = device.GetDispatchLoader(); 682 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
512 683 ci.pNext = nullptr;
513 const vk::BufferCreateInfo buffer_ci({}, CalculateBufferSize(framebuffer), 684 ci.flags = 0;
514 vk::BufferUsageFlagBits::eTransferSrc | 685 ci.size = CalculateBufferSize(framebuffer);
515 vk::BufferUsageFlagBits::eVertexBuffer | 686 ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
516 vk::BufferUsageFlagBits::eUniformBuffer, 687 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
517 vk::SharingMode::eExclusive, 0, nullptr); 688 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
518 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); 689 ci.queueFamilyIndexCount = 0;
519 buffer_commit = memory_manager.Commit(*buffer, true); 690 ci.pQueueFamilyIndices = nullptr;
691
692 buffer = device.GetLogical().CreateBuffer(ci);
693 buffer_commit = memory_manager.Commit(buffer, true);
520} 694}
521 695
522void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { 696void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
523 raw_images.resize(image_count); 697 raw_images.resize(image_count);
524 raw_buffer_commits.resize(image_count); 698 raw_buffer_commits.resize(image_count);
525 699
526 const auto format = GetFormat(framebuffer); 700 VkImageCreateInfo ci;
701 ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
702 ci.pNext = nullptr;
703 ci.flags = 0;
704 ci.imageType = VK_IMAGE_TYPE_2D;
705 ci.format = GetFormat(framebuffer);
706 ci.extent.width = framebuffer.width;
707 ci.extent.height = framebuffer.height;
708 ci.extent.depth = 1;
709 ci.mipLevels = 1;
710 ci.arrayLayers = 1;
711 ci.samples = VK_SAMPLE_COUNT_1_BIT;
712 ci.tiling = VK_IMAGE_TILING_LINEAR;
713 ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
714 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
715 ci.queueFamilyIndexCount = 0;
716 ci.pQueueFamilyIndices = nullptr;
717 ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
718
527 for (std::size_t i = 0; i < image_count; ++i) { 719 for (std::size_t i = 0; i < image_count; ++i) {
528 const vk::ImageCreateInfo image_ci( 720 raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT);
529 {}, vk::ImageType::e2D, format, {framebuffer.width, framebuffer.height, 1}, 1, 1,
530 vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal,
531 vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled,
532 vk::SharingMode::eExclusive, 0, nullptr, vk::ImageLayout::eUndefined);
533
534 raw_images[i] =
535 std::make_unique<VKImage>(device, scheduler, image_ci, vk::ImageAspectFlagBits::eColor);
536 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); 721 raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false);
537 } 722 }
538} 723}
539 724
540void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const { 725void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const {
541 const vk::DescriptorSet descriptor_set = descriptor_sets[image_index]; 726 VkDescriptorBufferInfo buffer_info;
542 727 buffer_info.buffer = *buffer;
543 const vk::DescriptorBufferInfo buffer_info(*buffer, offsetof(BufferData, uniform), 728 buffer_info.offset = offsetof(BufferData, uniform);
544 sizeof(BufferData::uniform)); 729 buffer_info.range = sizeof(BufferData::uniform);
545 const vk::WriteDescriptorSet ubo_write(descriptor_set, 0, 0, 1, 730
546 vk::DescriptorType::eUniformBuffer, nullptr, 731 VkWriteDescriptorSet ubo_write;
547 &buffer_info, nullptr); 732 ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
548 733 ubo_write.pNext = nullptr;
549 const vk::DescriptorImageInfo image_info(*sampler, image_view, 734 ubo_write.dstSet = descriptor_sets[image_index];
550 vk::ImageLayout::eShaderReadOnlyOptimal); 735 ubo_write.dstBinding = 0;
551 const vk::WriteDescriptorSet sampler_write(descriptor_set, 1, 0, 1, 736 ubo_write.dstArrayElement = 0;
552 vk::DescriptorType::eCombinedImageSampler, 737 ubo_write.descriptorCount = 1;
553 &image_info, nullptr, nullptr); 738 ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
554 739 ubo_write.pImageInfo = nullptr;
555 const auto dev = device.GetLogical(); 740 ubo_write.pBufferInfo = &buffer_info;
556 const auto& dld = device.GetDispatchLoader(); 741 ubo_write.pTexelBufferView = nullptr;
557 dev.updateDescriptorSets({ubo_write, sampler_write}, {}, dld); 742
743 VkDescriptorImageInfo image_info;
744 image_info.sampler = *sampler;
745 image_info.imageView = image_view;
746 image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
747
748 VkWriteDescriptorSet sampler_write;
749 sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
750 sampler_write.pNext = nullptr;
751 sampler_write.dstSet = descriptor_sets[image_index];
752 sampler_write.dstBinding = 1;
753 sampler_write.dstArrayElement = 0;
754 sampler_write.descriptorCount = 1;
755 sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
756 sampler_write.pImageInfo = &image_info;
757 sampler_write.pBufferInfo = nullptr;
758 sampler_write.pTexelBufferView = nullptr;
759
760 device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
558} 761}
559 762
560void VKBlitScreen::SetUniformData(BufferData& data, 763void VKBlitScreen::SetUniformData(BufferData& data,
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index ea680b3f5..5eb544aea 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -8,9 +8,9 @@
8#include <memory> 8#include <memory>
9#include <tuple> 9#include <tuple>
10 10
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 11#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/vk_resource_manager.h" 12#include "video_core/renderer_vulkan/vk_resource_manager.h"
13#include "video_core/renderer_vulkan/wrapper.h"
14 14
15namespace Core { 15namespace Core {
16class System; 16class System;
@@ -49,8 +49,8 @@ public:
49 49
50 void Recreate(); 50 void Recreate();
51 51
52 std::tuple<VKFence&, vk::Semaphore> Draw(const Tegra::FramebufferConfig& framebuffer, 52 std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
53 bool use_accelerated); 53 bool use_accelerated);
54 54
55private: 55private:
56 struct BufferData; 56 struct BufferData;
@@ -74,7 +74,7 @@ private:
74 void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); 74 void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
75 void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); 75 void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
76 76
77 void UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const; 77 void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
78 void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; 78 void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
79 void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; 79 void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
80 80
@@ -93,23 +93,23 @@ private:
93 const std::size_t image_count; 93 const std::size_t image_count;
94 const VKScreenInfo& screen_info; 94 const VKScreenInfo& screen_info;
95 95
96 UniqueShaderModule vertex_shader; 96 vk::ShaderModule vertex_shader;
97 UniqueShaderModule fragment_shader; 97 vk::ShaderModule fragment_shader;
98 UniqueDescriptorPool descriptor_pool; 98 vk::DescriptorPool descriptor_pool;
99 UniqueDescriptorSetLayout descriptor_set_layout; 99 vk::DescriptorSetLayout descriptor_set_layout;
100 UniquePipelineLayout pipeline_layout; 100 vk::PipelineLayout pipeline_layout;
101 UniquePipeline pipeline; 101 vk::Pipeline pipeline;
102 UniqueRenderPass renderpass; 102 vk::RenderPass renderpass;
103 std::vector<UniqueFramebuffer> framebuffers; 103 std::vector<vk::Framebuffer> framebuffers;
104 std::vector<vk::DescriptorSet> descriptor_sets; 104 vk::DescriptorSets descriptor_sets;
105 UniqueSampler sampler; 105 vk::Sampler sampler;
106 106
107 UniqueBuffer buffer; 107 vk::Buffer buffer;
108 VKMemoryCommit buffer_commit; 108 VKMemoryCommit buffer_commit;
109 109
110 std::vector<std::unique_ptr<VKFenceWatch>> watches; 110 std::vector<std::unique_ptr<VKFenceWatch>> watches;
111 111
112 std::vector<UniqueSemaphore> semaphores; 112 std::vector<vk::Semaphore> semaphores;
113 std::vector<std::unique_ptr<VKImage>> raw_images; 113 std::vector<std::unique_ptr<VKImage>> raw_images;
114 std::vector<VKMemoryCommit> raw_buffer_commits; 114 std::vector<VKMemoryCommit> raw_buffer_commits;
115 u32 raw_width = 0; 115 u32 raw_width = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1ba544943..0d167afbd 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -11,48 +11,50 @@
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/bit_util.h" 12#include "common/bit_util.h"
13#include "core/core.h" 13#include "core/core.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_buffer_cache.h" 14#include "video_core/renderer_vulkan/vk_buffer_cache.h"
16#include "video_core/renderer_vulkan/vk_device.h" 15#include "video_core/renderer_vulkan/vk_device.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 16#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_stream_buffer.h" 17#include "video_core/renderer_vulkan/vk_stream_buffer.h"
18#include "video_core/renderer_vulkan/wrapper.h"
19 19
20namespace Vulkan { 20namespace Vulkan {
21 21
22namespace { 22namespace {
23 23
24const auto BufferUsage = 24constexpr VkBufferUsageFlags BUFFER_USAGE =
25 vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | 25 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
26 vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; 26 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
27 27
28const auto UploadPipelineStage = 28constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE =
29 vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | 29 VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
30 vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | 30 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
31 vk::PipelineStageFlagBits::eComputeShader; 31 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
32 32
33const auto UploadAccessBarriers = 33constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
34 vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | 34 VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
35 vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | 35 VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
36 vk::AccessFlagBits::eIndexRead;
37 36
38auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { 37std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
39 return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage); 38 return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE);
40} 39}
41 40
42} // Anonymous namespace 41} // Anonymous namespace
43 42
44CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 43CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
45 CacheAddr cache_addr, std::size_t size) 44 VAddr cpu_addr, std::size_t size)
46 : VideoCommon::BufferBlock{cache_addr, size} { 45 : VideoCommon::BufferBlock{cpu_addr, size} {
47 const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), 46 VkBufferCreateInfo ci;
48 BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | 47 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
49 vk::BufferUsageFlagBits::eTransferDst, 48 ci.pNext = nullptr;
50 vk::SharingMode::eExclusive, 0, nullptr); 49 ci.flags = 0;
51 50 ci.size = static_cast<VkDeviceSize>(size);
52 const auto& dld{device.GetDispatchLoader()}; 51 ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
53 const auto dev{device.GetLogical()}; 52 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
54 buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); 53 ci.queueFamilyIndexCount = 0;
55 buffer.commit = memory_manager.Commit(*buffer.handle, false); 54 ci.pQueueFamilyIndices = nullptr;
55
56 buffer.handle = device.GetLogical().CreateBuffer(ci);
57 buffer.commit = memory_manager.Commit(buffer.handle, false);
56} 58}
57 59
58CachedBufferBlock::~CachedBufferBlock() = default; 60CachedBufferBlock::~CachedBufferBlock() = default;
@@ -60,30 +62,30 @@ CachedBufferBlock::~CachedBufferBlock() = default;
60VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 62VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
61 const VKDevice& device, VKMemoryManager& memory_manager, 63 const VKDevice& device, VKMemoryManager& memory_manager,
62 VKScheduler& scheduler, VKStagingBufferPool& staging_pool) 64 VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
63 : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system, 65 : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
64 CreateStreamBuffer(device, 66 CreateStreamBuffer(device,
65 scheduler)}, 67 scheduler)},
66 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ 68 device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
67 staging_pool} {} 69 staging_pool} {}
68 70
69VKBufferCache::~VKBufferCache() = default; 71VKBufferCache::~VKBufferCache() = default;
70 72
71Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { 73Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
72 return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); 74 return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
73} 75}
74 76
75const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { 77const VkBuffer* VKBufferCache::ToHandle(const Buffer& buffer) {
76 return buffer->GetHandle(); 78 return buffer->GetHandle();
77} 79}
78 80
79const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { 81const VkBuffer* VKBufferCache::GetEmptyBuffer(std::size_t size) {
80 size = std::max(size, std::size_t(4)); 82 size = std::max(size, std::size_t(4));
81 const auto& empty = staging_pool.GetUnusedBuffer(size, false); 83 const auto& empty = staging_pool.GetUnusedBuffer(size, false);
82 scheduler.RequestOutsideRenderPassOperationContext(); 84 scheduler.RequestOutsideRenderPassOperationContext();
83 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { 85 scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
84 cmdbuf.fillBuffer(buffer, 0, size, 0, dld); 86 cmdbuf.FillBuffer(buffer, 0, size, 0);
85 }); 87 });
86 return &*empty.handle; 88 return empty.handle.address();
87} 89}
88 90
89void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 91void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
@@ -93,14 +95,21 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
93 95
94 scheduler.RequestOutsideRenderPassOperationContext(); 96 scheduler.RequestOutsideRenderPassOperationContext();
95 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, 97 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
96 size](auto cmdbuf, auto& dld) { 98 size](vk::CommandBuffer cmdbuf) {
97 cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); 99 cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
98 cmdbuf.pipelineBarrier( 100
99 vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, 101 VkBufferMemoryBarrier barrier;
100 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, 102 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
101 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 103 barrier.pNext = nullptr;
102 offset, size)}, 104 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
103 {}, dld); 105 barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS;
106 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
107 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
108 barrier.buffer = buffer;
109 barrier.offset = offset;
110 barrier.size = size;
111 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
112 barrier, {});
104 }); 113 });
105} 114}
106 115
@@ -109,16 +118,23 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
109 const auto& staging = staging_pool.GetUnusedBuffer(size, true); 118 const auto& staging = staging_pool.GetUnusedBuffer(size, true);
110 scheduler.RequestOutsideRenderPassOperationContext(); 119 scheduler.RequestOutsideRenderPassOperationContext();
111 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, 120 scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset,
112 size](auto cmdbuf, auto& dld) { 121 size](vk::CommandBuffer cmdbuf) {
113 cmdbuf.pipelineBarrier( 122 VkBufferMemoryBarrier barrier;
114 vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | 123 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
115 vk::PipelineStageFlagBits::eComputeShader, 124 barrier.pNext = nullptr;
116 vk::PipelineStageFlagBits::eTransfer, {}, {}, 125 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
117 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, 126 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
118 vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, 127 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
119 VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, 128 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
120 {}, dld); 129 barrier.buffer = buffer;
121 cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); 130 barrier.offset = offset;
131 barrier.size = size;
132
133 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
134 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
135 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
136 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {});
137 cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size});
122 }); 138 });
123 scheduler.Finish(); 139 scheduler.Finish();
124 140
@@ -129,17 +145,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
129 std::size_t dst_offset, std::size_t size) { 145 std::size_t dst_offset, std::size_t size) {
130 scheduler.RequestOutsideRenderPassOperationContext(); 146 scheduler.RequestOutsideRenderPassOperationContext();
131 scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, 147 scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset,
132 dst_offset, size](auto cmdbuf, auto& dld) { 148 dst_offset, size](vk::CommandBuffer cmdbuf) {
133 cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); 149 cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});
134 cmdbuf.pipelineBarrier( 150
135 vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, 151 std::array<VkBufferMemoryBarrier, 2> barriers;
136 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, 152 barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
137 vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, 153 barriers[0].pNext = nullptr;
138 VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), 154 barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
139 vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, 155 barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
140 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, 156 barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
141 dst_offset, size)}, 157 barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
142 {}, dld); 158 barriers[0].buffer = src_buffer;
159 barriers[0].offset = src_offset;
160 barriers[0].size = size;
161 barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
162 barriers[1].pNext = nullptr;
163 barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
164 barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS;
165 barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
166 barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
167 barriers[1].buffer = dst_buffer;
168 barriers[1].offset = dst_offset;
169 barriers[1].size = size;
170 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
171 barriers, {});
143 }); 172 });
144} 173}
145 174
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 3f38eed0c..d3c23da98 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -11,11 +11,11 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/buffer_cache/buffer_cache.h" 12#include "video_core/buffer_cache/buffer_cache.h"
13#include "video_core/rasterizer_cache.h" 13#include "video_core/rasterizer_cache.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h" 14#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h" 15#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 16#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
18#include "video_core/renderer_vulkan/vk_stream_buffer.h" 17#include "video_core/renderer_vulkan/vk_stream_buffer.h"
18#include "video_core/renderer_vulkan/wrapper.h"
19 19
20namespace Core { 20namespace Core {
21class System; 21class System;
@@ -30,11 +30,11 @@ class VKScheduler;
30class CachedBufferBlock final : public VideoCommon::BufferBlock { 30class CachedBufferBlock final : public VideoCommon::BufferBlock {
31public: 31public:
32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, 32 explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager,
33 CacheAddr cache_addr, std::size_t size); 33 VAddr cpu_addr, std::size_t size);
34 ~CachedBufferBlock(); 34 ~CachedBufferBlock();
35 35
36 const vk::Buffer* GetHandle() const { 36 const VkBuffer* GetHandle() const {
37 return &*buffer.handle; 37 return buffer.handle.address();
38 } 38 }
39 39
40private: 40private:
@@ -43,21 +43,21 @@ private:
43 43
44using Buffer = std::shared_ptr<CachedBufferBlock>; 44using Buffer = std::shared_ptr<CachedBufferBlock>;
45 45
46class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> { 46class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
47public: 47public:
48 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, 48 explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
49 const VKDevice& device, VKMemoryManager& memory_manager, 49 const VKDevice& device, VKMemoryManager& memory_manager,
50 VKScheduler& scheduler, VKStagingBufferPool& staging_pool); 50 VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
51 ~VKBufferCache(); 51 ~VKBufferCache();
52 52
53 const vk::Buffer* GetEmptyBuffer(std::size_t size) override; 53 const VkBuffer* GetEmptyBuffer(std::size_t size) override;
54 54
55protected: 55protected:
56 void WriteBarrier() override {} 56 void WriteBarrier() override {}
57 57
58 Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; 58 Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
59 59
60 const vk::Buffer* ToHandle(const Buffer& buffer) override; 60 const VkBuffer* ToHandle(const Buffer& buffer) override;
61 61
62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 62 void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
63 const u8* data) override; 63 const u8* data) override;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 7bdda3d79..9d92305f4 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -10,13 +10,13 @@
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_compute_pass.h" 13#include "video_core/renderer_vulkan/vk_compute_pass.h"
15#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
16#include "video_core/renderer_vulkan/vk_device.h" 15#include "video_core/renderer_vulkan/vk_device.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 16#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 17#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
19#include "video_core/renderer_vulkan/vk_update_descriptor.h" 18#include "video_core/renderer_vulkan/vk_update_descriptor.h"
19#include "video_core/renderer_vulkan/wrapper.h"
20 20
21namespace Vulkan { 21namespace Vulkan {
22 22
@@ -114,6 +114,35 @@ constexpr u8 quad_array[] = {
114 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 114 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
115 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; 115 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
116 116
117VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
118 VkDescriptorSetLayoutBinding binding;
119 binding.binding = 0;
120 binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
121 binding.descriptorCount = 1;
122 binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
123 binding.pImmutableSamplers = nullptr;
124 return binding;
125}
126
127VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() {
128 VkDescriptorUpdateTemplateEntryKHR entry;
129 entry.dstBinding = 0;
130 entry.dstArrayElement = 0;
131 entry.descriptorCount = 1;
132 entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
133 entry.offset = 0;
134 entry.stride = sizeof(DescriptorUpdateEntry);
135 return entry;
136}
137
138VkPushConstantRange BuildQuadArrayPassPushConstantRange() {
139 VkPushConstantRange range;
140 range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
141 range.offset = 0;
142 range.size = sizeof(u32);
143 return range;
144}
145
117// Uint8 SPIR-V module. Generated from the "shaders/" directory. 146// Uint8 SPIR-V module. Generated from the "shaders/" directory.
118constexpr u8 uint8_pass[] = { 147constexpr u8 uint8_pass[] = {
119 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, 148 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
@@ -191,53 +220,111 @@ constexpr u8 uint8_pass[] = {
191 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 220 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
192 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; 221 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
193 222
223std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() {
224 std::array<VkDescriptorSetLayoutBinding, 2> bindings;
225 bindings[0].binding = 0;
226 bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
227 bindings[0].descriptorCount = 1;
228 bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
229 bindings[0].pImmutableSamplers = nullptr;
230 bindings[1].binding = 1;
231 bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
232 bindings[1].descriptorCount = 1;
233 bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
234 bindings[1].pImmutableSamplers = nullptr;
235 return bindings;
236}
237
238VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() {
239 VkDescriptorUpdateTemplateEntryKHR entry;
240 entry.dstBinding = 0;
241 entry.dstArrayElement = 0;
242 entry.descriptorCount = 2;
243 entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
244 entry.offset = 0;
245 entry.stride = sizeof(DescriptorUpdateEntry);
246 return entry;
247}
248
194} // Anonymous namespace 249} // Anonymous namespace
195 250
196VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 251VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
197 const std::vector<vk::DescriptorSetLayoutBinding>& bindings, 252 vk::Span<VkDescriptorSetLayoutBinding> bindings,
198 const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, 253 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
199 const std::vector<vk::PushConstantRange> push_constants, 254 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
200 std::size_t code_size, const u8* code) { 255 const u8* code) {
201 const auto dev = device.GetLogical(); 256 VkDescriptorSetLayoutCreateInfo descriptor_layout_ci;
202 const auto& dld = device.GetDispatchLoader(); 257 descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
203 258 descriptor_layout_ci.pNext = nullptr;
204 const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( 259 descriptor_layout_ci.flags = 0;
205 {}, static_cast<u32>(bindings.size()), bindings.data()); 260 descriptor_layout_ci.bindingCount = bindings.size();
206 descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); 261 descriptor_layout_ci.pBindings = bindings.data();
207 262 descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci);
208 const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 263
209 static_cast<u32>(push_constants.size()), 264 VkPipelineLayoutCreateInfo pipeline_layout_ci;
210 push_constants.data()); 265 pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
211 layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); 266 pipeline_layout_ci.pNext = nullptr;
267 pipeline_layout_ci.flags = 0;
268 pipeline_layout_ci.setLayoutCount = 1;
269 pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address();
270 pipeline_layout_ci.pushConstantRangeCount = push_constants.size();
271 pipeline_layout_ci.pPushConstantRanges = push_constants.data();
272 layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci);
212 273
213 if (!templates.empty()) { 274 if (!templates.empty()) {
214 const vk::DescriptorUpdateTemplateCreateInfo template_ci( 275 VkDescriptorUpdateTemplateCreateInfoKHR template_ci;
215 {}, static_cast<u32>(templates.size()), templates.data(), 276 template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
216 vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, 277 template_ci.pNext = nullptr;
217 vk::PipelineBindPoint::eGraphics, *layout, 0); 278 template_ci.flags = 0;
218 descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); 279 template_ci.descriptorUpdateEntryCount = templates.size();
280 template_ci.pDescriptorUpdateEntries = templates.data();
281 template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
282 template_ci.descriptorSetLayout = *descriptor_set_layout;
283 template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
284 template_ci.pipelineLayout = *layout;
285 template_ci.set = 0;
286 descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci);
219 287
220 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); 288 descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
221 } 289 }
222 290
223 auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); 291 auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
224 std::memcpy(code_copy.get(), code, code_size); 292 std::memcpy(code_copy.get(), code, code_size);
225 const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get());
226 module = dev.createShaderModuleUnique(module_ci, nullptr, dld);
227 293
228 const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module, 294 VkShaderModuleCreateInfo module_ci;
229 "main", nullptr); 295 module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
296 module_ci.pNext = nullptr;
297 module_ci.flags = 0;
298 module_ci.codeSize = code_size;
299 module_ci.pCode = code_copy.get();
300 module = device.GetLogical().CreateShaderModule(module_ci);
301
302 VkComputePipelineCreateInfo pipeline_ci;
303 pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
304 pipeline_ci.pNext = nullptr;
305 pipeline_ci.flags = 0;
306 pipeline_ci.layout = *layout;
307 pipeline_ci.basePipelineHandle = nullptr;
308 pipeline_ci.basePipelineIndex = 0;
230 309
231 const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0); 310 VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage;
232 pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld); 311 stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
312 stage_ci.pNext = nullptr;
313 stage_ci.flags = 0;
314 stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
315 stage_ci.module = *module;
316 stage_ci.pName = "main";
317 stage_ci.pSpecializationInfo = nullptr;
318
319 pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci);
233} 320}
234 321
235VKComputePass::~VKComputePass() = default; 322VKComputePass::~VKComputePass() = default;
236 323
237vk::DescriptorSet VKComputePass::CommitDescriptorSet( 324VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
238 VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) { 325 VKFence& fence) {
239 if (!descriptor_template) { 326 if (!descriptor_template) {
240 return {}; 327 return nullptr;
241 } 328 }
242 const auto set = descriptor_allocator->Commit(fence); 329 const auto set = descriptor_allocator->Commit(fence);
243 update_descriptor_queue.Send(*descriptor_template, set); 330 update_descriptor_queue.Send(*descriptor_template, set);
@@ -248,25 +335,21 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
248 VKDescriptorPool& descriptor_pool, 335 VKDescriptorPool& descriptor_pool,
249 VKStagingBufferPool& staging_buffer_pool, 336 VKStagingBufferPool& staging_buffer_pool,
250 VKUpdateDescriptorQueue& update_descriptor_queue) 337 VKUpdateDescriptorQueue& update_descriptor_queue)
251 : VKComputePass(device, descriptor_pool, 338 : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(),
252 {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, 339 BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
253 vk::ShaderStageFlagBits::eCompute, nullptr)}, 340 BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array),
254 {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer,
255 0, sizeof(DescriptorUpdateEntry))},
256 {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))},
257 std::size(quad_array), quad_array),
258 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 341 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
259 update_descriptor_queue{update_descriptor_queue} {} 342 update_descriptor_queue{update_descriptor_queue} {}
260 343
261QuadArrayPass::~QuadArrayPass() = default; 344QuadArrayPass::~QuadArrayPass() = default;
262 345
263std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { 346std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
264 const u32 num_triangle_vertices = num_vertices * 6 / 4; 347 const u32 num_triangle_vertices = num_vertices * 6 / 4;
265 const std::size_t staging_size = num_triangle_vertices * sizeof(u32); 348 const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
266 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); 349 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
267 350
268 update_descriptor_queue.Acquire(); 351 update_descriptor_queue.Acquire();
269 update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); 352 update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size);
270 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); 353 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
271 354
272 scheduler.RequestOutsideRenderPassOperationContext(); 355 scheduler.RequestOutsideRenderPassOperationContext();
@@ -274,66 +357,72 @@ std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_ver
274 ASSERT(num_vertices % 4 == 0); 357 ASSERT(num_vertices % 4 == 0);
275 const u32 num_quads = num_vertices / 4; 358 const u32 num_quads = num_vertices / 4;
276 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads, 359 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads,
277 first, set](auto cmdbuf, auto& dld) { 360 first, set](vk::CommandBuffer cmdbuf) {
278 constexpr u32 dispatch_size = 1024; 361 constexpr u32 dispatch_size = 1024;
279 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); 362 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
280 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); 363 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
281 cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first, 364 cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first);
282 dld); 365 cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1);
283 cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld); 366
284 367 VkBufferMemoryBarrier barrier;
285 const vk::BufferMemoryBarrier barrier( 368 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
286 vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, 369 barrier.pNext = nullptr;
287 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, 370 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
288 static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32)); 371 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
289 cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, 372 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
290 vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); 373 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
374 barrier.buffer = buffer;
375 barrier.offset = 0;
376 barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32);
377 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
378 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
291 }); 379 });
292 return {*buffer.handle, 0}; 380 return {buffer.handle.address(), 0};
293} 381}
294 382
295Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, 383Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
296 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, 384 VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool,
297 VKUpdateDescriptorQueue& update_descriptor_queue) 385 VKUpdateDescriptorQueue& update_descriptor_queue)
298 : VKComputePass(device, descriptor_pool, 386 : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(),
299 {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, 387 BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass),
300 vk::ShaderStageFlagBits::eCompute, nullptr), 388 uint8_pass),
301 vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1,
302 vk::ShaderStageFlagBits::eCompute, nullptr)},
303 {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer,
304 0, sizeof(DescriptorUpdateEntry))},
305 {}, std::size(uint8_pass), uint8_pass),
306 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, 389 scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool},
307 update_descriptor_queue{update_descriptor_queue} {} 390 update_descriptor_queue{update_descriptor_queue} {}
308 391
309Uint8Pass::~Uint8Pass() = default; 392Uint8Pass::~Uint8Pass() = default;
310 393
311std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer, 394std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
312 u64 src_offset) { 395 u64 src_offset) {
313 const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); 396 const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
314 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); 397 auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
315 398
316 update_descriptor_queue.Acquire(); 399 update_descriptor_queue.Acquire();
317 update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); 400 update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices);
318 update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); 401 update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size);
319 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); 402 const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
320 403
321 scheduler.RequestOutsideRenderPassOperationContext(); 404 scheduler.RequestOutsideRenderPassOperationContext();
322 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, 405 scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
323 num_vertices](auto cmdbuf, auto& dld) { 406 num_vertices](vk::CommandBuffer cmdbuf) {
324 constexpr u32 dispatch_size = 1024; 407 constexpr u32 dispatch_size = 1024;
325 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); 408 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
326 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); 409 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {});
327 cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld); 410 cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1);
328 411
329 const vk::BufferMemoryBarrier barrier( 412 VkBufferMemoryBarrier barrier;
330 vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, 413 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
331 VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, 414 barrier.pNext = nullptr;
332 static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16)); 415 barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
333 cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, 416 barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
334 vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); 417 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
418 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
419 barrier.buffer = buffer;
420 barrier.offset = 0;
421 barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16));
422 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
423 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
335 }); 424 });
336 return {&*buffer.handle, 0}; 425 return {buffer.handle.address(), 0};
337} 426}
338 427
339} // namespace Vulkan 428} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 7057eb837..c62516bff 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -8,8 +8,8 @@
8#include <utility> 8#include <utility>
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 11#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -22,24 +22,24 @@ class VKUpdateDescriptorQueue;
22class VKComputePass { 22class VKComputePass {
23public: 23public:
24 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, 24 explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
25 const std::vector<vk::DescriptorSetLayoutBinding>& bindings, 25 vk::Span<VkDescriptorSetLayoutBinding> bindings,
26 const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, 26 vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
27 const std::vector<vk::PushConstantRange> push_constants, 27 vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
28 std::size_t code_size, const u8* code); 28 const u8* code);
29 ~VKComputePass(); 29 ~VKComputePass();
30 30
31protected: 31protected:
32 vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, 32 VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
33 VKFence& fence); 33 VKFence& fence);
34 34
35 UniqueDescriptorUpdateTemplate descriptor_template; 35 vk::DescriptorUpdateTemplateKHR descriptor_template;
36 UniquePipelineLayout layout; 36 vk::PipelineLayout layout;
37 UniquePipeline pipeline; 37 vk::Pipeline pipeline;
38 38
39private: 39private:
40 UniqueDescriptorSetLayout descriptor_set_layout; 40 vk::DescriptorSetLayout descriptor_set_layout;
41 std::optional<DescriptorAllocator> descriptor_allocator; 41 std::optional<DescriptorAllocator> descriptor_allocator;
42 UniqueShaderModule module; 42 vk::ShaderModule module;
43}; 43};
44 44
45class QuadArrayPass final : public VKComputePass { 45class QuadArrayPass final : public VKComputePass {
@@ -50,7 +50,7 @@ public:
50 VKUpdateDescriptorQueue& update_descriptor_queue); 50 VKUpdateDescriptorQueue& update_descriptor_queue);
51 ~QuadArrayPass(); 51 ~QuadArrayPass();
52 52
53 std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first); 53 std::pair<const VkBuffer*, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
54 54
55private: 55private:
56 VKScheduler& scheduler; 56 VKScheduler& scheduler;
@@ -65,8 +65,7 @@ public:
65 VKUpdateDescriptorQueue& update_descriptor_queue); 65 VKUpdateDescriptorQueue& update_descriptor_queue);
66 ~Uint8Pass(); 66 ~Uint8Pass();
67 67
68 std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer, 68 std::pair<const VkBuffer*, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
69 u64 src_offset);
70 69
71private: 70private:
72 VKScheduler& scheduler; 71 VKScheduler& scheduler;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 60f57d83e..23beafa4f 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -5,7 +5,6 @@
5#include <memory> 5#include <memory>
6#include <vector> 6#include <vector>
7 7
8#include "video_core/renderer_vulkan/declarations.h"
9#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 8#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
10#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 9#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/renderer_vulkan/vk_device.h" 10#include "video_core/renderer_vulkan/vk_device.h"
@@ -14,6 +13,7 @@
14#include "video_core/renderer_vulkan/vk_scheduler.h" 13#include "video_core/renderer_vulkan/vk_scheduler.h"
15#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 14#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
16#include "video_core/renderer_vulkan/vk_update_descriptor.h" 15#include "video_core/renderer_vulkan/vk_update_descriptor.h"
16#include "video_core/renderer_vulkan/wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
@@ -30,7 +30,7 @@ VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& schedu
30 30
31VKComputePipeline::~VKComputePipeline() = default; 31VKComputePipeline::~VKComputePipeline() = default;
32 32
33vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { 33VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
34 if (!descriptor_template) { 34 if (!descriptor_template) {
35 return {}; 35 return {};
36 } 36 }
@@ -39,74 +39,109 @@ vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() {
39 return set; 39 return set;
40} 40}
41 41
42UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { 42vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
43 std::vector<vk::DescriptorSetLayoutBinding> bindings; 43 std::vector<VkDescriptorSetLayoutBinding> bindings;
44 u32 binding = 0; 44 u32 binding = 0;
45 const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { 45 const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
46 // TODO(Rodrigo): Maybe make individual bindings here? 46 // TODO(Rodrigo): Maybe make individual bindings here?
47 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { 47 for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
48 bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute, 48 VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
49 nullptr); 49 entry.binding = binding++;
50 entry.descriptorType = descriptor_type;
51 entry.descriptorCount = 1;
52 entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
53 entry.pImmutableSamplers = nullptr;
50 } 54 }
51 }; 55 };
52 AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); 56 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
53 AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); 57 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
54 AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); 58 add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size());
55 AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); 59 add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
56 AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); 60 add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
57 61
58 const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( 62 VkDescriptorSetLayoutCreateInfo ci;
59 {}, static_cast<u32>(bindings.size()), bindings.data()); 63 ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
60 64 ci.pNext = nullptr;
61 const auto dev = device.GetLogical(); 65 ci.flags = 0;
62 const auto& dld = device.GetDispatchLoader(); 66 ci.bindingCount = static_cast<u32>(bindings.size());
63 return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); 67 ci.pBindings = bindings.data();
68 return device.GetLogical().CreateDescriptorSetLayout(ci);
64} 69}
65 70
66UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const { 71vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
67 const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr); 72 VkPipelineLayoutCreateInfo ci;
68 const auto dev = device.GetLogical(); 73 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
69 return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader()); 74 ci.pNext = nullptr;
75 ci.flags = 0;
76 ci.setLayoutCount = 1;
77 ci.pSetLayouts = descriptor_set_layout.address();
78 ci.pushConstantRangeCount = 0;
79 ci.pPushConstantRanges = nullptr;
80 return device.GetLogical().CreatePipelineLayout(ci);
70} 81}
71 82
72UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const { 83vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
73 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; 84 std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
74 u32 binding = 0; 85 u32 binding = 0;
75 u32 offset = 0; 86 u32 offset = 0;
76 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); 87 FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
77 if (template_entries.empty()) { 88 if (template_entries.empty()) {
78 // If the shader doesn't use descriptor sets, skip template creation. 89 // If the shader doesn't use descriptor sets, skip template creation.
79 return UniqueDescriptorUpdateTemplate{}; 90 return {};
80 } 91 }
81 92
82 const vk::DescriptorUpdateTemplateCreateInfo template_ci( 93 VkDescriptorUpdateTemplateCreateInfoKHR ci;
83 {}, static_cast<u32>(template_entries.size()), template_entries.data(), 94 ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
84 vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, 95 ci.pNext = nullptr;
85 vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); 96 ci.flags = 0;
86 97 ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size());
87 const auto dev = device.GetLogical(); 98 ci.pDescriptorUpdateEntries = template_entries.data();
88 const auto& dld = device.GetDispatchLoader(); 99 ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
89 return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); 100 ci.descriptorSetLayout = *descriptor_set_layout;
101 ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
102 ci.pipelineLayout = *layout;
103 ci.set = DESCRIPTOR_SET;
104 return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
90} 105}
91 106
92UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { 107vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
93 const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data()); 108 VkShaderModuleCreateInfo ci;
94 const auto dev = device.GetLogical(); 109 ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
95 return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader()); 110 ci.pNext = nullptr;
111 ci.flags = 0;
112 ci.codeSize = code.size() * sizeof(u32);
113 ci.pCode = code.data();
114 return device.GetLogical().CreateShaderModule(ci);
96} 115}
97 116
98UniquePipeline VKComputePipeline::CreatePipeline() const { 117vk::Pipeline VKComputePipeline::CreatePipeline() const {
99 vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute, 118 VkComputePipelineCreateInfo ci;
100 *shader_module, "main", nullptr); 119 VkPipelineShaderStageCreateInfo& stage_ci = ci.stage;
101 vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; 120 stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
121 stage_ci.pNext = nullptr;
122 stage_ci.flags = 0;
123 stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT;
124 stage_ci.module = *shader_module;
125 stage_ci.pName = "main";
126 stage_ci.pSpecializationInfo = nullptr;
127
128 VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
129 subgroup_size_ci.sType =
130 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT;
131 subgroup_size_ci.pNext = nullptr;
102 subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; 132 subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
103 if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) { 133
104 shader_stage_ci.pNext = &subgroup_size_ci; 134 if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
135 stage_ci.pNext = &subgroup_size_ci;
105 } 136 }
106 137
107 const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0); 138 ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
108 const auto dev = device.GetLogical(); 139 ci.pNext = nullptr;
109 return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader()); 140 ci.flags = 0;
141 ci.layout = *layout;
142 ci.basePipelineHandle = nullptr;
143 ci.basePipelineIndex = 0;
144 return device.GetLogical().CreateComputePipeline(ci);
110} 145}
111 146
112} // namespace Vulkan 147} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index 22235c6c9..33b9af29e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -7,9 +7,9 @@
7#include <memory> 7#include <memory>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 10#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
12#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 11#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -25,42 +25,42 @@ public:
25 const SPIRVShader& shader); 25 const SPIRVShader& shader);
26 ~VKComputePipeline(); 26 ~VKComputePipeline();
27 27
28 vk::DescriptorSet CommitDescriptorSet(); 28 VkDescriptorSet CommitDescriptorSet();
29 29
30 vk::Pipeline GetHandle() const { 30 VkPipeline GetHandle() const {
31 return *pipeline; 31 return *pipeline;
32 } 32 }
33 33
34 vk::PipelineLayout GetLayout() const { 34 VkPipelineLayout GetLayout() const {
35 return *layout; 35 return *layout;
36 } 36 }
37 37
38 const ShaderEntries& GetEntries() { 38 const ShaderEntries& GetEntries() const {
39 return entries; 39 return entries;
40 } 40 }
41 41
42private: 42private:
43 UniqueDescriptorSetLayout CreateDescriptorSetLayout() const; 43 vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
44 44
45 UniquePipelineLayout CreatePipelineLayout() const; 45 vk::PipelineLayout CreatePipelineLayout() const;
46 46
47 UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const; 47 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
48 48
49 UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const; 49 vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
50 50
51 UniquePipeline CreatePipeline() const; 51 vk::Pipeline CreatePipeline() const;
52 52
53 const VKDevice& device; 53 const VKDevice& device;
54 VKScheduler& scheduler; 54 VKScheduler& scheduler;
55 ShaderEntries entries; 55 ShaderEntries entries;
56 56
57 UniqueDescriptorSetLayout descriptor_set_layout; 57 vk::DescriptorSetLayout descriptor_set_layout;
58 DescriptorAllocator descriptor_allocator; 58 DescriptorAllocator descriptor_allocator;
59 VKUpdateDescriptorQueue& update_descriptor_queue; 59 VKUpdateDescriptorQueue& update_descriptor_queue;
60 UniquePipelineLayout layout; 60 vk::PipelineLayout layout;
61 UniqueDescriptorUpdateTemplate descriptor_template; 61 vk::DescriptorUpdateTemplateKHR descriptor_template;
62 UniqueShaderModule shader_module; 62 vk::ShaderModule shader_module;
63 UniquePipeline pipeline; 63 vk::Pipeline pipeline;
64}; 64};
65 65
66} // namespace Vulkan 66} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index cc7c281a0..e9d528aa6 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -6,10 +6,10 @@
6#include <vector> 6#include <vector>
7 7
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 9#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
11#include "video_core/renderer_vulkan/vk_device.h" 10#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_resource_manager.h" 11#include "video_core/renderer_vulkan/vk_resource_manager.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -17,19 +17,18 @@ namespace Vulkan {
17constexpr std::size_t SETS_GROW_RATE = 0x20; 17constexpr std::size_t SETS_GROW_RATE = 0x20;
18 18
19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, 19DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
20 vk::DescriptorSetLayout layout) 20 VkDescriptorSetLayout layout)
21 : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} 21 : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
22 22
23DescriptorAllocator::~DescriptorAllocator() = default; 23DescriptorAllocator::~DescriptorAllocator() = default;
24 24
25vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) { 25VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
26 return *descriptors[CommitResource(fence)]; 26 const std::size_t index = CommitResource(fence);
27 return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
27} 28}
28 29
29void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { 30void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
30 auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin); 31 descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
31 descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()),
32 std::make_move_iterator(new_sets.end()));
33} 32}
34 33
35VKDescriptorPool::VKDescriptorPool(const VKDevice& device) 34VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
@@ -37,53 +36,50 @@ VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
37 36
38VKDescriptorPool::~VKDescriptorPool() = default; 37VKDescriptorPool::~VKDescriptorPool() = default;
39 38
40vk::DescriptorPool VKDescriptorPool::AllocateNewPool() { 39vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() {
41 static constexpr u32 num_sets = 0x20000; 40 static constexpr u32 num_sets = 0x20000;
42 static constexpr vk::DescriptorPoolSize pool_sizes[] = { 41 static constexpr VkDescriptorPoolSize pool_sizes[] = {
43 {vk::DescriptorType::eUniformBuffer, num_sets * 90}, 42 {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90},
44 {vk::DescriptorType::eStorageBuffer, num_sets * 60}, 43 {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60},
45 {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64}, 44 {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64},
46 {vk::DescriptorType::eCombinedImageSampler, num_sets * 64}, 45 {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64},
47 {vk::DescriptorType::eStorageImage, num_sets * 40}}; 46 {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}};
48 47
49 const vk::DescriptorPoolCreateInfo create_info( 48 VkDescriptorPoolCreateInfo ci;
50 vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets, 49 ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
51 static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes)); 50 ci.pNext = nullptr;
52 const auto dev = device.GetLogical(); 51 ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
53 return *pools.emplace_back( 52 ci.maxSets = num_sets;
54 dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader())); 53 ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes));
54 ci.pPoolSizes = std::data(pool_sizes);
55 return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci));
55} 56}
56 57
57std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors( 58vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout,
58 vk::DescriptorSetLayout layout, std::size_t count) { 59 std::size_t count) {
59 std::vector layout_copies(count, layout); 60 const std::vector layout_copies(count, layout);
60 vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count), 61 VkDescriptorSetAllocateInfo ai;
61 layout_copies.data()); 62 ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
62 63 ai.pNext = nullptr;
63 std::vector<vk::DescriptorSet> sets(count); 64 ai.descriptorPool = **active_pool;
64 const auto dev = device.GetLogical(); 65 ai.descriptorSetCount = static_cast<u32>(count);
65 const auto& dld = device.GetDispatchLoader(); 66 ai.pSetLayouts = layout_copies.data();
66 switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) { 67
67 case vk::Result::eSuccess: 68 vk::DescriptorSets sets = active_pool->Allocate(ai);
68 break; 69 if (!sets.IsOutOfPoolMemory()) {
69 case vk::Result::eErrorOutOfPoolMemory: 70 return sets;
70 active_pool = AllocateNewPool();
71 allocate_info.descriptorPool = active_pool;
72 if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) {
73 break;
74 }
75 [[fallthrough]];
76 default:
77 vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique");
78 } 71 }
79 72
80 vk::PoolFree deleter(dev, active_pool, dld); 73 // Our current pool is out of memory. Allocate a new one and retry
81 std::vector<UniqueDescriptorSet> unique_sets; 74 active_pool = AllocateNewPool();
82 unique_sets.reserve(count); 75 ai.descriptorPool = **active_pool;
83 for (const auto set : sets) { 76 sets = active_pool->Allocate(ai);
84 unique_sets.push_back(UniqueDescriptorSet{set, deleter}); 77 if (!sets.IsOutOfPoolMemory()) {
78 return sets;
85 } 79 }
86 return unique_sets; 80
81 // After allocating a new pool, we are out of memory again. We can't handle this from here.
82 throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY);
87} 83}
88 84
89} // namespace Vulkan 85} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index a441dbc0f..ab40c70f0 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -8,8 +8,8 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_resource_manager.h" 11#include "video_core/renderer_vulkan/vk_resource_manager.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -17,21 +17,21 @@ class VKDescriptorPool;
17 17
18class DescriptorAllocator final : public VKFencedPool { 18class DescriptorAllocator final : public VKFencedPool {
19public: 19public:
20 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout); 20 explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
21 ~DescriptorAllocator() override; 21 ~DescriptorAllocator() override;
22 22
23 DescriptorAllocator(const DescriptorAllocator&) = delete; 23 DescriptorAllocator(const DescriptorAllocator&) = delete;
24 24
25 vk::DescriptorSet Commit(VKFence& fence); 25 VkDescriptorSet Commit(VKFence& fence);
26 26
27protected: 27protected:
28 void Allocate(std::size_t begin, std::size_t end) override; 28 void Allocate(std::size_t begin, std::size_t end) override;
29 29
30private: 30private:
31 VKDescriptorPool& descriptor_pool; 31 VKDescriptorPool& descriptor_pool;
32 const vk::DescriptorSetLayout layout; 32 const VkDescriptorSetLayout layout;
33 33
34 std::vector<UniqueDescriptorSet> descriptors; 34 std::vector<vk::DescriptorSets> descriptors_allocations;
35}; 35};
36 36
37class VKDescriptorPool final { 37class VKDescriptorPool final {
@@ -42,15 +42,14 @@ public:
42 ~VKDescriptorPool(); 42 ~VKDescriptorPool();
43 43
44private: 44private:
45 vk::DescriptorPool AllocateNewPool(); 45 vk::DescriptorPool* AllocateNewPool();
46 46
47 std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout, 47 vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
48 std::size_t count);
49 48
50 const VKDevice& device; 49 const VKDevice& device;
51 50
52 std::vector<UniqueDescriptorPool> pools; 51 std::vector<vk::DescriptorPool> pools;
53 vk::DescriptorPool active_pool; 52 vk::DescriptorPool* active_pool;
54}; 53};
55 54
56} // namespace Vulkan \ No newline at end of file 55} // namespace Vulkan \ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 28d2fbc4f..52d29e49d 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -6,14 +6,15 @@
6#include <chrono> 6#include <chrono>
7#include <cstdlib> 7#include <cstdlib>
8#include <optional> 8#include <optional>
9#include <set>
10#include <string_view> 9#include <string_view>
11#include <thread> 10#include <thread>
11#include <unordered_set>
12#include <vector> 12#include <vector>
13
13#include "common/assert.h" 14#include "common/assert.h"
14#include "core/settings.h" 15#include "core/settings.h"
15#include "video_core/renderer_vulkan/declarations.h"
16#include "video_core/renderer_vulkan/vk_device.h" 16#include "video_core/renderer_vulkan/vk_device.h"
17#include "video_core/renderer_vulkan/wrapper.h"
17 18
18namespace Vulkan { 19namespace Vulkan {
19 20
@@ -21,49 +22,43 @@ namespace {
21 22
22namespace Alternatives { 23namespace Alternatives {
23 24
24constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint, 25constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT,
25 vk::Format::eD16UnormS8Uint, vk::Format{}}; 26 VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}};
26constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint, 27constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT,
27 vk::Format::eD32SfloatS8Uint, vk::Format{}}; 28 VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}};
28 29
29} // namespace Alternatives 30} // namespace Alternatives
30 31
32constexpr std::array REQUIRED_EXTENSIONS = {
33 VK_KHR_SWAPCHAIN_EXTENSION_NAME,
34 VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
35 VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
36 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
37 VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
38 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
39 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
40 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
41 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
42};
43
31template <typename T> 44template <typename T>
32void SetNext(void**& next, T& data) { 45void SetNext(void**& next, T& data) {
33 *next = &data; 46 *next = &data;
34 next = &data.pNext; 47 next = &data.pNext;
35} 48}
36 49
37template <typename T> 50constexpr const VkFormat* GetFormatAlternatives(VkFormat format) {
38T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
39 vk::PhysicalDeviceFeatures2 features;
40 T extension_features;
41 features.pNext = &extension_features;
42 physical.getFeatures2(&features, dldi);
43 return extension_features;
44}
45
46template <typename T>
47T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) {
48 vk::PhysicalDeviceProperties2 properties;
49 T extension_properties;
50 properties.pNext = &extension_properties;
51 physical.getProperties2(&properties, dldi);
52 return extension_properties;
53}
54
55constexpr const vk::Format* GetFormatAlternatives(vk::Format format) {
56 switch (format) { 51 switch (format) {
57 case vk::Format::eD24UnormS8Uint: 52 case VK_FORMAT_D24_UNORM_S8_UINT:
58 return Alternatives::Depth24UnormS8Uint.data(); 53 return Alternatives::Depth24UnormS8_UINT.data();
59 case vk::Format::eD16UnormS8Uint: 54 case VK_FORMAT_D16_UNORM_S8_UINT:
60 return Alternatives::Depth16UnormS8Uint.data(); 55 return Alternatives::Depth16UnormS8_UINT.data();
61 default: 56 default:
62 return nullptr; 57 return nullptr;
63 } 58 }
64} 59}
65 60
66vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) { 61VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) {
67 switch (format_type) { 62 switch (format_type) {
68 case FormatType::Linear: 63 case FormatType::Linear:
69 return properties.linearTilingFeatures; 64 return properties.linearTilingFeatures;
@@ -76,79 +71,220 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format
76 } 71 }
77} 72}
78 73
74std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
75 vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
76 static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32,
77 VK_FORMAT_A8B8G8R8_UINT_PACK32,
78 VK_FORMAT_A8B8G8R8_SNORM_PACK32,
79 VK_FORMAT_A8B8G8R8_SRGB_PACK32,
80 VK_FORMAT_B5G6R5_UNORM_PACK16,
81 VK_FORMAT_A2B10G10R10_UNORM_PACK32,
82 VK_FORMAT_A1R5G5B5_UNORM_PACK16,
83 VK_FORMAT_R32G32B32A32_SFLOAT,
84 VK_FORMAT_R32G32B32A32_UINT,
85 VK_FORMAT_R32G32_SFLOAT,
86 VK_FORMAT_R32G32_UINT,
87 VK_FORMAT_R16G16B16A16_UINT,
88 VK_FORMAT_R16G16B16A16_SNORM,
89 VK_FORMAT_R16G16B16A16_UNORM,
90 VK_FORMAT_R16G16_UNORM,
91 VK_FORMAT_R16G16_SNORM,
92 VK_FORMAT_R16G16_SFLOAT,
93 VK_FORMAT_R16_UNORM,
94 VK_FORMAT_R8G8B8A8_SRGB,
95 VK_FORMAT_R8G8_UNORM,
96 VK_FORMAT_R8G8_SNORM,
97 VK_FORMAT_R8_UNORM,
98 VK_FORMAT_R8_UINT,
99 VK_FORMAT_B10G11R11_UFLOAT_PACK32,
100 VK_FORMAT_R32_SFLOAT,
101 VK_FORMAT_R32_UINT,
102 VK_FORMAT_R32_SINT,
103 VK_FORMAT_R16_SFLOAT,
104 VK_FORMAT_R16G16B16A16_SFLOAT,
105 VK_FORMAT_B8G8R8A8_UNORM,
106 VK_FORMAT_R4G4B4A4_UNORM_PACK16,
107 VK_FORMAT_D32_SFLOAT,
108 VK_FORMAT_D16_UNORM,
109 VK_FORMAT_D16_UNORM_S8_UINT,
110 VK_FORMAT_D24_UNORM_S8_UINT,
111 VK_FORMAT_D32_SFLOAT_S8_UINT,
112 VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
113 VK_FORMAT_BC2_UNORM_BLOCK,
114 VK_FORMAT_BC3_UNORM_BLOCK,
115 VK_FORMAT_BC4_UNORM_BLOCK,
116 VK_FORMAT_BC5_UNORM_BLOCK,
117 VK_FORMAT_BC5_SNORM_BLOCK,
118 VK_FORMAT_BC7_UNORM_BLOCK,
119 VK_FORMAT_BC6H_UFLOAT_BLOCK,
120 VK_FORMAT_BC6H_SFLOAT_BLOCK,
121 VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
122 VK_FORMAT_BC2_SRGB_BLOCK,
123 VK_FORMAT_BC3_SRGB_BLOCK,
124 VK_FORMAT_BC7_SRGB_BLOCK,
125 VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
126 VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
127 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
128 VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
129 VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
130 VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
131 VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
132 VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
133 VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
134 VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
135 VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
136 VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
137 VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
138 VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
139 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
140 VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
141 VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
142 VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
143 VK_FORMAT_E5B9G9R9_UFLOAT_PACK32};
144 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
145 for (const auto format : formats) {
146 format_properties.emplace(format, physical.GetFormatProperties(format));
147 }
148 return format_properties;
149}
150
79} // Anonymous namespace 151} // Anonymous namespace
80 152
81VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 153VKDevice::VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
82 vk::SurfaceKHR surface) 154 const vk::InstanceDispatch& dld)
83 : physical{physical}, properties{physical.getProperties(dldi)}, 155 : dld{dld}, physical{physical}, properties{physical.GetProperties()},
84 format_properties{GetFormatProperties(dldi, physical)} { 156 format_properties{GetFormatProperties(physical, dld)} {
85 SetupFamilies(dldi, surface); 157 SetupFamilies(surface);
86 SetupFeatures(dldi); 158 SetupFeatures();
87} 159}
88 160
89VKDevice::~VKDevice() = default; 161VKDevice::~VKDevice() = default;
90 162
91bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { 163bool VKDevice::Create() {
92 const auto queue_cis = GetDeviceQueueCreateInfos(); 164 const auto queue_cis = GetDeviceQueueCreateInfos();
93 const std::vector extensions = LoadExtensions(dldi); 165 const std::vector extensions = LoadExtensions();
94 166
95 vk::PhysicalDeviceFeatures2 features2; 167 VkPhysicalDeviceFeatures2 features2;
168 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
169 features2.pNext = nullptr;
96 void** next = &features2.pNext; 170 void** next = &features2.pNext;
171
97 auto& features = features2.features; 172 auto& features = features2.features;
98 features.vertexPipelineStoresAndAtomics = true; 173 features.robustBufferAccess = false;
174 features.fullDrawIndexUint32 = false;
175 features.imageCubeArray = false;
99 features.independentBlend = true; 176 features.independentBlend = true;
177 features.geometryShader = true;
178 features.tessellationShader = true;
179 features.sampleRateShading = false;
180 features.dualSrcBlend = false;
181 features.logicOp = false;
182 features.multiDrawIndirect = false;
183 features.drawIndirectFirstInstance = false;
100 features.depthClamp = true; 184 features.depthClamp = true;
101 features.samplerAnisotropy = true; 185 features.depthBiasClamp = true;
186 features.fillModeNonSolid = false;
187 features.depthBounds = false;
188 features.wideLines = false;
102 features.largePoints = true; 189 features.largePoints = true;
190 features.alphaToOne = false;
103 features.multiViewport = true; 191 features.multiViewport = true;
104 features.depthBiasClamp = true; 192 features.samplerAnisotropy = true;
105 features.geometryShader = true; 193 features.textureCompressionETC2 = false;
106 features.tessellationShader = true; 194 features.textureCompressionASTC_LDR = is_optimal_astc_supported;
195 features.textureCompressionBC = false;
107 features.occlusionQueryPrecise = true; 196 features.occlusionQueryPrecise = true;
197 features.pipelineStatisticsQuery = false;
198 features.vertexPipelineStoresAndAtomics = true;
108 features.fragmentStoresAndAtomics = true; 199 features.fragmentStoresAndAtomics = true;
200 features.shaderTessellationAndGeometryPointSize = false;
109 features.shaderImageGatherExtended = true; 201 features.shaderImageGatherExtended = true;
202 features.shaderStorageImageExtendedFormats = false;
203 features.shaderStorageImageMultisample = false;
110 features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; 204 features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
111 features.shaderStorageImageWriteWithoutFormat = true; 205 features.shaderStorageImageWriteWithoutFormat = true;
112 features.textureCompressionASTC_LDR = is_optimal_astc_supported; 206 features.shaderUniformBufferArrayDynamicIndexing = false;
113 207 features.shaderSampledImageArrayDynamicIndexing = false;
114 vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage; 208 features.shaderStorageBufferArrayDynamicIndexing = false;
209 features.shaderStorageImageArrayDynamicIndexing = false;
210 features.shaderClipDistance = false;
211 features.shaderCullDistance = false;
212 features.shaderFloat64 = false;
213 features.shaderInt64 = false;
214 features.shaderInt16 = false;
215 features.shaderResourceResidency = false;
216 features.shaderResourceMinLod = false;
217 features.sparseBinding = false;
218 features.sparseResidencyBuffer = false;
219 features.sparseResidencyImage2D = false;
220 features.sparseResidencyImage3D = false;
221 features.sparseResidency2Samples = false;
222 features.sparseResidency4Samples = false;
223 features.sparseResidency8Samples = false;
224 features.sparseResidency16Samples = false;
225 features.sparseResidencyAliased = false;
226 features.variableMultisampleRate = false;
227 features.inheritedQueries = false;
228
229 VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage;
230 bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
231 bit16_storage.pNext = nullptr;
232 bit16_storage.storageBuffer16BitAccess = false;
115 bit16_storage.uniformAndStorageBuffer16BitAccess = true; 233 bit16_storage.uniformAndStorageBuffer16BitAccess = true;
234 bit16_storage.storagePushConstant16 = false;
235 bit16_storage.storageInputOutput16 = false;
116 SetNext(next, bit16_storage); 236 SetNext(next, bit16_storage);
117 237
118 vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage; 238 VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage;
239 bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR;
240 bit8_storage.pNext = nullptr;
241 bit8_storage.storageBuffer8BitAccess = false;
119 bit8_storage.uniformAndStorageBuffer8BitAccess = true; 242 bit8_storage.uniformAndStorageBuffer8BitAccess = true;
243 bit8_storage.storagePushConstant8 = false;
120 SetNext(next, bit8_storage); 244 SetNext(next, bit8_storage);
121 245
122 vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; 246 VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
247 host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT;
123 host_query_reset.hostQueryReset = true; 248 host_query_reset.hostQueryReset = true;
124 SetNext(next, host_query_reset); 249 SetNext(next, host_query_reset);
125 250
126 vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; 251 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
127 if (is_float16_supported) { 252 if (is_float16_supported) {
253 float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
254 float16_int8.pNext = nullptr;
128 float16_int8.shaderFloat16 = true; 255 float16_int8.shaderFloat16 = true;
256 float16_int8.shaderInt8 = false;
129 SetNext(next, float16_int8); 257 SetNext(next, float16_int8);
130 } else { 258 } else {
131 LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); 259 LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
132 } 260 }
133 261
134 vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; 262 VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
135 if (khr_uniform_buffer_standard_layout) { 263 if (khr_uniform_buffer_standard_layout) {
264 std430_layout.sType =
265 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR;
266 std430_layout.pNext = nullptr;
136 std430_layout.uniformBufferStandardLayout = true; 267 std430_layout.uniformBufferStandardLayout = true;
137 SetNext(next, std430_layout); 268 SetNext(next, std430_layout);
138 } else { 269 } else {
139 LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); 270 LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
140 } 271 }
141 272
142 vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; 273 VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
143 if (ext_index_type_uint8) { 274 if (ext_index_type_uint8) {
275 index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT;
276 index_type_uint8.pNext = nullptr;
144 index_type_uint8.indexTypeUint8 = true; 277 index_type_uint8.indexTypeUint8 = true;
145 SetNext(next, index_type_uint8); 278 SetNext(next, index_type_uint8);
146 } else { 279 } else {
147 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); 280 LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
148 } 281 }
149 282
150 vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; 283 VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
151 if (ext_transform_feedback) { 284 if (ext_transform_feedback) {
285 transform_feedback.sType =
286 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
287 transform_feedback.pNext = nullptr;
152 transform_feedback.transformFeedback = true; 288 transform_feedback.transformFeedback = true;
153 transform_feedback.geometryStreams = true; 289 transform_feedback.geometryStreams = true;
154 SetNext(next, transform_feedback); 290 SetNext(next, transform_feedback);
@@ -160,62 +296,48 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
160 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); 296 LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
161 } 297 }
162 298
163 vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, 299 logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld);
164 nullptr, static_cast<u32>(extensions.size()), extensions.data(), 300 if (!logical) {
165 nullptr); 301 LOG_ERROR(Render_Vulkan, "Failed to create logical device");
166 device_ci.pNext = &features2;
167
168 vk::Device dummy_logical;
169 if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
170 LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
171 return false; 302 return false;
172 } 303 }
173 304
174 dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr);
175 logical = UniqueDevice(
176 dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
177
178 CollectTelemetryParameters(); 305 CollectTelemetryParameters();
179 306
180 graphics_queue = logical->getQueue(graphics_family, 0, dld); 307 graphics_queue = logical.GetQueue(graphics_family);
181 present_queue = logical->getQueue(present_family, 0, dld); 308 present_queue = logical.GetQueue(present_family);
182 return true; 309 return true;
183} 310}
184 311
185vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, 312VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
186 vk::FormatFeatureFlags wanted_usage, 313 FormatType format_type) const {
187 FormatType format_type) const {
188 if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { 314 if (IsFormatSupported(wanted_format, wanted_usage, format_type)) {
189 return wanted_format; 315 return wanted_format;
190 } 316 }
191 // The wanted format is not supported by hardware, search for alternatives 317 // The wanted format is not supported by hardware, search for alternatives
192 const vk::Format* alternatives = GetFormatAlternatives(wanted_format); 318 const VkFormat* alternatives = GetFormatAlternatives(wanted_format);
193 if (alternatives == nullptr) { 319 if (alternatives == nullptr) {
194 UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " 320 UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
195 "hardware does not support it", 321 "hardware does not support it",
196 vk::to_string(wanted_format), vk::to_string(wanted_usage), 322 wanted_format, wanted_usage, format_type);
197 static_cast<u32>(format_type));
198 return wanted_format; 323 return wanted_format;
199 } 324 }
200 325
201 std::size_t i = 0; 326 std::size_t i = 0;
202 for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; 327 for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) {
203 alternative = alternatives[++i]) {
204 if (!IsFormatSupported(alternative, wanted_usage, format_type)) { 328 if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
205 continue; 329 continue;
206 } 330 }
207 LOG_WARNING(Render_Vulkan, 331 LOG_WARNING(Render_Vulkan,
208 "Emulating format={} with alternative format={} with usage={} and type={}", 332 "Emulating format={} with alternative format={} with usage={} and type={}",
209 static_cast<u32>(wanted_format), static_cast<u32>(alternative), 333 wanted_format, alternative, wanted_usage, format_type);
210 static_cast<u32>(wanted_usage), static_cast<u32>(format_type));
211 return alternative; 334 return alternative;
212 } 335 }
213 336
214 // No alternatives found, panic 337 // No alternatives found, panic
215 UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " 338 UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
216 "doesn't support any of the alternatives", 339 "doesn't support any of the alternatives",
217 static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), 340 wanted_format, wanted_usage, format_type);
218 static_cast<u32>(format_type));
219 return wanted_format; 341 return wanted_format;
220} 342}
221 343
@@ -229,35 +351,39 @@ void VKDevice::ReportLoss() const {
229 return; 351 return;
230 } 352 }
231 353
232 [[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld); 354 [[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld);
233 // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be 355 // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be
234 // executed. It can be done on a debugger by evaluating the expression: 356 // executed. It can be done on a debugger by evaluating the expression:
235 // *(VKGraphicsPipeline*)data[0] 357 // *(VKGraphicsPipeline*)data[0]
236} 358}
237 359
238bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, 360bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const {
239 const vk::DispatchLoaderDynamic& dldi) const {
240 // Disable for now to avoid converting ASTC twice. 361 // Disable for now to avoid converting ASTC twice.
241 return false;
242 static constexpr std::array astc_formats = { 362 static constexpr std::array astc_formats = {
243 vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock, 363 VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
244 vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock, 364 VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
245 vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, 365 VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
246 vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, 366 VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
247 vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, 367 VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
248 vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, 368 VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
249 vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock, 369 VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
250 vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock, 370 VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
251 vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock}; 371 VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
372 VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
373 VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
374 VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
375 VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
376 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
377 };
252 if (!features.textureCompressionASTC_LDR) { 378 if (!features.textureCompressionASTC_LDR) {
253 return false; 379 return false;
254 } 380 }
255 const auto format_feature_usage{ 381 const auto format_feature_usage{
256 vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | 382 VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT |
257 vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | 383 VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
258 vk::FormatFeatureFlagBits::eTransferDst}; 384 VK_FORMAT_FEATURE_TRANSFER_DST_BIT};
259 for (const auto format : astc_formats) { 385 for (const auto format : astc_formats) {
260 const auto format_properties{physical.getFormatProperties(format, dldi)}; 386 const auto format_properties{physical.GetFormatProperties(format)};
261 if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { 387 if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
262 return false; 388 return false;
263 } 389 }
@@ -265,62 +391,49 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
265 return true; 391 return true;
266} 392}
267 393
268bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, 394bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
269 FormatType format_type) const { 395 FormatType format_type) const {
270 const auto it = format_properties.find(wanted_format); 396 const auto it = format_properties.find(wanted_format);
271 if (it == format_properties.end()) { 397 if (it == format_properties.end()) {
272 UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format)); 398 UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format);
273 return true; 399 return true;
274 } 400 }
275 const auto supported_usage = GetFormatFeatures(it->second, format_type); 401 const auto supported_usage = GetFormatFeatures(it->second, format_type);
276 return (supported_usage & wanted_usage) == wanted_usage; 402 return (supported_usage & wanted_usage) == wanted_usage;
277} 403}
278 404
279bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 405bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
280 vk::SurfaceKHR surface) {
281 bool is_suitable = true; 406 bool is_suitable = true;
407 std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions;
282 408
283 constexpr std::array required_extensions = { 409 for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) {
284 VK_KHR_SWAPCHAIN_EXTENSION_NAME, 410 for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
285 VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
286 VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
287 VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
288 VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
289 VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
290 VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
291 VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
292 };
293 std::bitset<required_extensions.size()> available_extensions{};
294
295 for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
296 for (std::size_t i = 0; i < required_extensions.size(); ++i) {
297 if (available_extensions[i]) { 411 if (available_extensions[i]) {
298 continue; 412 continue;
299 } 413 }
300 available_extensions[i] = 414 const std::string_view name{prop.extensionName};
301 required_extensions[i] == std::string_view{prop.extensionName}; 415 available_extensions[i] = name == REQUIRED_EXTENSIONS[i];
302 } 416 }
303 } 417 }
304 if (!available_extensions.all()) { 418 if (!available_extensions.all()) {
305 for (std::size_t i = 0; i < required_extensions.size(); ++i) { 419 for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) {
306 if (available_extensions[i]) { 420 if (available_extensions[i]) {
307 continue; 421 continue;
308 } 422 }
309 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]); 423 LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]);
310 is_suitable = false; 424 is_suitable = false;
311 } 425 }
312 } 426 }
313 427
314 bool has_graphics{}, has_present{}; 428 bool has_graphics{}, has_present{};
315 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); 429 const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
316 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { 430 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
317 const auto& family = queue_family_properties[i]; 431 const auto& family = queue_family_properties[i];
318 if (family.queueCount == 0) { 432 if (family.queueCount == 0) {
319 continue; 433 continue;
320 } 434 }
321 has_graphics |= 435 has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT;
322 (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); 436 has_present |= physical.GetSurfaceSupportKHR(i, surface);
323 has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
324 } 437 }
325 if (!has_graphics || !has_present) { 438 if (!has_graphics || !has_present) {
326 LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); 439 LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue");
@@ -328,7 +441,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
328 } 441 }
329 442
330 // TODO(Rodrigo): Check if the device matches all requeriments. 443 // TODO(Rodrigo): Check if the device matches all requeriments.
331 const auto properties{physical.getProperties(dldi)}; 444 const auto properties{physical.GetProperties()};
332 const auto& limits{properties.limits}; 445 const auto& limits{properties.limits};
333 446
334 constexpr u32 required_ubo_size = 65536; 447 constexpr u32 required_ubo_size = 65536;
@@ -345,7 +458,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
345 is_suitable = false; 458 is_suitable = false;
346 } 459 }
347 460
348 const auto features{physical.getFeatures(dldi)}; 461 const auto features{physical.GetFeatures()};
349 const std::array feature_report = { 462 const std::array feature_report = {
350 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), 463 std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
351 std::make_pair(features.independentBlend, "independentBlend"), 464 std::make_pair(features.independentBlend, "independentBlend"),
@@ -377,9 +490,9 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
377 return is_suitable; 490 return is_suitable;
378} 491}
379 492
380std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { 493std::vector<const char*> VKDevice::LoadExtensions() {
381 std::vector<const char*> extensions; 494 std::vector<const char*> extensions;
382 const auto Test = [&](const vk::ExtensionProperties& extension, 495 const auto Test = [&](const VkExtensionProperties& extension,
383 std::optional<std::reference_wrapper<bool>> status, const char* name, 496 std::optional<std::reference_wrapper<bool>> status, const char* name,
384 bool push) { 497 bool push) {
385 if (extension.extensionName != std::string_view(name)) { 498 if (extension.extensionName != std::string_view(name)) {
@@ -393,22 +506,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
393 } 506 }
394 }; 507 };
395 508
396 extensions.reserve(15); 509 extensions.reserve(7 + REQUIRED_EXTENSIONS.size());
397 extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); 510 extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end());
398 extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); 511
399 extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
400 extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME);
401 extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
402 extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
403 extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
404 extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
405
406 [[maybe_unused]] const bool nsight =
407 std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
408 bool has_khr_shader_float16_int8{}; 512 bool has_khr_shader_float16_int8{};
409 bool has_ext_subgroup_size_control{}; 513 bool has_ext_subgroup_size_control{};
410 bool has_ext_transform_feedback{}; 514 bool has_ext_transform_feedback{};
411 for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { 515 for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) {
412 Test(extension, khr_uniform_buffer_standard_layout, 516 Test(extension, khr_uniform_buffer_standard_layout,
413 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); 517 VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
414 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, 518 Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
@@ -428,38 +532,67 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
428 } 532 }
429 } 533 }
430 534
535 VkPhysicalDeviceFeatures2KHR features;
536 features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR;
537
538 VkPhysicalDeviceProperties2KHR properties;
539 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
540
431 if (has_khr_shader_float16_int8) { 541 if (has_khr_shader_float16_int8) {
432 is_float16_supported = 542 VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features;
433 GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; 543 float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR;
544 float16_int8_features.pNext = nullptr;
545 features.pNext = &float16_int8_features;
546
547 physical.GetFeatures2KHR(features);
548 is_float16_supported = float16_int8_features.shaderFloat16;
434 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); 549 extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
435 } 550 }
436 551
437 if (has_ext_subgroup_size_control) { 552 if (has_ext_subgroup_size_control) {
438 const auto features = 553 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features;
439 GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); 554 subgroup_features.sType =
440 const auto properties = 555 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT;
441 GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi); 556 subgroup_features.pNext = nullptr;
442 557 features.pNext = &subgroup_features;
443 is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize; 558 physical.GetFeatures2KHR(features);
444 559
445 if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize && 560 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties;
446 properties.maxSubgroupSize >= GuestWarpSize) { 561 subgroup_properties.sType =
562 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT;
563 subgroup_properties.pNext = nullptr;
564 properties.pNext = &subgroup_properties;
565 physical.GetProperties2KHR(properties);
566
567 is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize;
568
569 if (subgroup_features.subgroupSizeControl &&
570 subgroup_properties.minSubgroupSize <= GuestWarpSize &&
571 subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
447 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); 572 extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
448 guest_warp_stages = properties.requiredSubgroupSizeStages; 573 guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
449 } 574 }
450 } else { 575 } else {
451 is_warp_potentially_bigger = true; 576 is_warp_potentially_bigger = true;
452 } 577 }
453 578
454 if (has_ext_transform_feedback) { 579 if (has_ext_transform_feedback) {
455 const auto features = 580 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
456 GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); 581 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
457 const auto properties = 582 tfb_features.pNext = nullptr;
458 GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); 583 features.pNext = &tfb_features;
459 584 physical.GetFeatures2KHR(features);
460 if (features.transformFeedback && features.geometryStreams && 585
461 properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && 586 VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties;
462 properties.transformFeedbackQueries && properties.transformFeedbackDraw) { 587 tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT;
588 tfb_properties.pNext = nullptr;
589 properties.pNext = &tfb_properties;
590 physical.GetProperties2KHR(properties);
591
592 if (tfb_features.transformFeedback && tfb_features.geometryStreams &&
593 tfb_properties.maxTransformFeedbackStreams >= 4 &&
594 tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries &&
595 tfb_properties.transformFeedbackDraw) {
463 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); 596 extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
464 ext_transform_feedback = true; 597 ext_transform_feedback = true;
465 } 598 }
@@ -468,10 +601,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
468 return extensions; 601 return extensions;
469} 602}
470 603
471void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { 604void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
472 std::optional<u32> graphics_family_, present_family_; 605 std::optional<u32> graphics_family_, present_family_;
473 606
474 const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); 607 const std::vector queue_family_properties = physical.GetQueueFamilyProperties();
475 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { 608 for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
476 if (graphics_family_ && present_family_) 609 if (graphics_family_ && present_family_)
477 break; 610 break;
@@ -480,10 +613,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
480 if (queue_family.queueCount == 0) 613 if (queue_family.queueCount == 0)
481 continue; 614 continue;
482 615
483 if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) 616 if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) {
484 graphics_family_ = i; 617 graphics_family_ = i;
485 if (physical.getSurfaceSupportKHR(i, surface, dldi)) 618 }
619 if (physical.GetSurfaceSupportKHR(i, surface)) {
486 present_family_ = i; 620 present_family_ = i;
621 }
487 } 622 }
488 ASSERT(graphics_family_ && present_family_); 623 ASSERT(graphics_family_ && present_family_);
489 624
@@ -491,111 +626,49 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
491 present_family = *present_family_; 626 present_family = *present_family_;
492} 627}
493 628
494void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { 629void VKDevice::SetupFeatures() {
495 const auto supported_features{physical.getFeatures(dldi)}; 630 const auto supported_features{physical.GetFeatures()};
496 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; 631 is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
497 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); 632 is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
498} 633}
499 634
500void VKDevice::CollectTelemetryParameters() { 635void VKDevice::CollectTelemetryParameters() {
501 const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld); 636 VkPhysicalDeviceDriverPropertiesKHR driver;
637 driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR;
638 driver.pNext = nullptr;
639
640 VkPhysicalDeviceProperties2KHR properties;
641 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
642 properties.pNext = &driver;
643 physical.GetProperties2KHR(properties);
644
502 driver_id = driver.driverID; 645 driver_id = driver.driverID;
503 vendor_name = driver.driverName; 646 vendor_name = driver.driverName;
504 647
505 const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld); 648 const std::vector extensions = physical.EnumerateDeviceExtensionProperties();
506 reported_extensions.reserve(std::size(extensions)); 649 reported_extensions.reserve(std::size(extensions));
507 for (const auto& extension : extensions) { 650 for (const auto& extension : extensions) {
508 reported_extensions.push_back(extension.extensionName); 651 reported_extensions.push_back(extension.extensionName);
509 } 652 }
510} 653}
511 654
512std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { 655std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
513 static const float QUEUE_PRIORITY = 1.0f; 656 static constexpr float QUEUE_PRIORITY = 1.0f;
514 657
515 std::set<u32> unique_queue_families = {graphics_family, present_family}; 658 std::unordered_set<u32> unique_queue_families = {graphics_family, present_family};
516 std::vector<vk::DeviceQueueCreateInfo> queue_cis; 659 std::vector<VkDeviceQueueCreateInfo> queue_cis;
517 660
518 for (u32 queue_family : unique_queue_families) 661 for (const u32 queue_family : unique_queue_families) {
519 queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY}); 662 VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back();
663 ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
664 ci.pNext = nullptr;
665 ci.flags = 0;
666 ci.queueFamilyIndex = queue_family;
667 ci.queueCount = 1;
668 ci.pQueuePriorities = &QUEUE_PRIORITY;
669 }
520 670
521 return queue_cis; 671 return queue_cis;
522} 672}
523 673
524std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
525 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
526 static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
527 vk::Format::eA8B8G8R8UintPack32,
528 vk::Format::eA8B8G8R8SnormPack32,
529 vk::Format::eA8B8G8R8SrgbPack32,
530 vk::Format::eB5G6R5UnormPack16,
531 vk::Format::eA2B10G10R10UnormPack32,
532 vk::Format::eA1R5G5B5UnormPack16,
533 vk::Format::eR32G32B32A32Sfloat,
534 vk::Format::eR32G32B32A32Uint,
535 vk::Format::eR32G32Sfloat,
536 vk::Format::eR32G32Uint,
537 vk::Format::eR16G16B16A16Uint,
538 vk::Format::eR16G16B16A16Snorm,
539 vk::Format::eR16G16B16A16Unorm,
540 vk::Format::eR16G16Unorm,
541 vk::Format::eR16G16Snorm,
542 vk::Format::eR16G16Sfloat,
543 vk::Format::eR16Unorm,
544 vk::Format::eR8G8B8A8Srgb,
545 vk::Format::eR8G8Unorm,
546 vk::Format::eR8G8Snorm,
547 vk::Format::eR8Unorm,
548 vk::Format::eR8Uint,
549 vk::Format::eB10G11R11UfloatPack32,
550 vk::Format::eR32Sfloat,
551 vk::Format::eR32Uint,
552 vk::Format::eR32Sint,
553 vk::Format::eR16Sfloat,
554 vk::Format::eR16G16B16A16Sfloat,
555 vk::Format::eB8G8R8A8Unorm,
556 vk::Format::eR4G4B4A4UnormPack16,
557 vk::Format::eD32Sfloat,
558 vk::Format::eD16Unorm,
559 vk::Format::eD16UnormS8Uint,
560 vk::Format::eD24UnormS8Uint,
561 vk::Format::eD32SfloatS8Uint,
562 vk::Format::eBc1RgbaUnormBlock,
563 vk::Format::eBc2UnormBlock,
564 vk::Format::eBc3UnormBlock,
565 vk::Format::eBc4UnormBlock,
566 vk::Format::eBc5UnormBlock,
567 vk::Format::eBc5SnormBlock,
568 vk::Format::eBc7UnormBlock,
569 vk::Format::eBc6HUfloatBlock,
570 vk::Format::eBc6HSfloatBlock,
571 vk::Format::eBc1RgbaSrgbBlock,
572 vk::Format::eBc2SrgbBlock,
573 vk::Format::eBc3SrgbBlock,
574 vk::Format::eBc7SrgbBlock,
575 vk::Format::eAstc4x4SrgbBlock,
576 vk::Format::eAstc8x8SrgbBlock,
577 vk::Format::eAstc8x5SrgbBlock,
578 vk::Format::eAstc5x4SrgbBlock,
579 vk::Format::eAstc5x5UnormBlock,
580 vk::Format::eAstc5x5SrgbBlock,
581 vk::Format::eAstc10x8UnormBlock,
582 vk::Format::eAstc10x8SrgbBlock,
583 vk::Format::eAstc6x6UnormBlock,
584 vk::Format::eAstc6x6SrgbBlock,
585 vk::Format::eAstc10x10UnormBlock,
586 vk::Format::eAstc10x10SrgbBlock,
587 vk::Format::eAstc12x12UnormBlock,
588 vk::Format::eAstc12x12SrgbBlock,
589 vk::Format::eAstc8x6UnormBlock,
590 vk::Format::eAstc8x6SrgbBlock,
591 vk::Format::eAstc6x5UnormBlock,
592 vk::Format::eAstc6x5SrgbBlock,
593 vk::Format::eE5B9G9R9UfloatPack32};
594 std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
595 for (const auto format : formats) {
596 format_properties.emplace(format, physical.getFormatProperties(format, dldi));
597 }
598 return format_properties;
599}
600
601} // namespace Vulkan 674} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 6e656517f..60d64572a 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -8,8 +8,9 @@
8#include <string_view> 8#include <string_view>
9#include <unordered_map> 9#include <unordered_map>
10#include <vector> 10#include <vector>
11
11#include "common/common_types.h" 12#include "common/common_types.h"
12#include "video_core/renderer_vulkan/declarations.h" 13#include "video_core/renderer_vulkan/wrapper.h"
13 14
14namespace Vulkan { 15namespace Vulkan {
15 16
@@ -22,12 +23,12 @@ const u32 GuestWarpSize = 32;
22/// Handles data specific to a physical device. 23/// Handles data specific to a physical device.
23class VKDevice final { 24class VKDevice final {
24public: 25public:
25 explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 26 explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface,
26 vk::SurfaceKHR surface); 27 const vk::InstanceDispatch& dld);
27 ~VKDevice(); 28 ~VKDevice();
28 29
29 /// Initializes the device. Returns true on success. 30 /// Initializes the device. Returns true on success.
30 bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); 31 bool Create();
31 32
32 /** 33 /**
33 * Returns a format supported by the device for the passed requeriments. 34 * Returns a format supported by the device for the passed requeriments.
@@ -36,20 +37,20 @@ public:
36 * @param format_type Format type usage. 37 * @param format_type Format type usage.
37 * @returns A format supported by the device. 38 * @returns A format supported by the device.
38 */ 39 */
39 vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, 40 VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
40 FormatType format_type) const; 41 FormatType format_type) const;
41 42
42 /// Reports a device loss. 43 /// Reports a device loss.
43 void ReportLoss() const; 44 void ReportLoss() const;
44 45
45 /// Returns the dispatch loader with direct function pointers of the device. 46 /// Returns the dispatch loader with direct function pointers of the device.
46 const vk::DispatchLoaderDynamic& GetDispatchLoader() const { 47 const vk::DeviceDispatch& GetDispatchLoader() const {
47 return dld; 48 return dld;
48 } 49 }
49 50
50 /// Returns the logical device. 51 /// Returns the logical device.
51 vk::Device GetLogical() const { 52 const vk::Device& GetLogical() const {
52 return logical.get(); 53 return logical;
53 } 54 }
54 55
55 /// Returns the physical device. 56 /// Returns the physical device.
@@ -79,7 +80,7 @@ public:
79 80
80 /// Returns true if the device is integrated with the host CPU. 81 /// Returns true if the device is integrated with the host CPU.
81 bool IsIntegrated() const { 82 bool IsIntegrated() const {
82 return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; 83 return properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
83 } 84 }
84 85
85 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. 86 /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers.
@@ -98,27 +99,27 @@ public:
98 } 99 }
99 100
100 /// Returns the driver ID. 101 /// Returns the driver ID.
101 vk::DriverIdKHR GetDriverID() const { 102 VkDriverIdKHR GetDriverID() const {
102 return driver_id; 103 return driver_id;
103 } 104 }
104 105
105 /// Returns uniform buffer alignment requeriment. 106 /// Returns uniform buffer alignment requeriment.
106 vk::DeviceSize GetUniformBufferAlignment() const { 107 VkDeviceSize GetUniformBufferAlignment() const {
107 return properties.limits.minUniformBufferOffsetAlignment; 108 return properties.limits.minUniformBufferOffsetAlignment;
108 } 109 }
109 110
110 /// Returns storage alignment requeriment. 111 /// Returns storage alignment requeriment.
111 vk::DeviceSize GetStorageBufferAlignment() const { 112 VkDeviceSize GetStorageBufferAlignment() const {
112 return properties.limits.minStorageBufferOffsetAlignment; 113 return properties.limits.minStorageBufferOffsetAlignment;
113 } 114 }
114 115
115 /// Returns the maximum range for storage buffers. 116 /// Returns the maximum range for storage buffers.
116 vk::DeviceSize GetMaxStorageBufferRange() const { 117 VkDeviceSize GetMaxStorageBufferRange() const {
117 return properties.limits.maxStorageBufferRange; 118 return properties.limits.maxStorageBufferRange;
118 } 119 }
119 120
120 /// Returns the maximum size for push constants. 121 /// Returns the maximum size for push constants.
121 vk::DeviceSize GetMaxPushConstantsSize() const { 122 VkDeviceSize GetMaxPushConstantsSize() const {
122 return properties.limits.maxPushConstantsSize; 123 return properties.limits.maxPushConstantsSize;
123 } 124 }
124 125
@@ -138,8 +139,8 @@ public:
138 } 139 }
139 140
140 /// Returns true if the device can be forced to use the guest warp size. 141 /// Returns true if the device can be forced to use the guest warp size.
141 bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const { 142 bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const {
142 return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; 143 return guest_warp_stages & stage;
143 } 144 }
144 145
145 /// Returns true if formatless image load is supported. 146 /// Returns true if formatless image load is supported.
@@ -188,50 +189,44 @@ public:
188 } 189 }
189 190
190 /// Checks if the physical device is suitable. 191 /// Checks if the physical device is suitable.
191 static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, 192 static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
192 vk::SurfaceKHR surface);
193 193
194private: 194private:
195 /// Loads extensions into a vector and stores available ones in this object. 195 /// Loads extensions into a vector and stores available ones in this object.
196 std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); 196 std::vector<const char*> LoadExtensions();
197 197
198 /// Sets up queue families. 198 /// Sets up queue families.
199 void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); 199 void SetupFamilies(VkSurfaceKHR surface);
200 200
201 /// Sets up device features. 201 /// Sets up device features.
202 void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); 202 void SetupFeatures();
203 203
204 /// Collects telemetry information from the device. 204 /// Collects telemetry information from the device.
205 void CollectTelemetryParameters(); 205 void CollectTelemetryParameters();
206 206
207 /// Returns a list of queue initialization descriptors. 207 /// Returns a list of queue initialization descriptors.
208 std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 208 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
209 209
210 /// Returns true if ASTC textures are natively supported. 210 /// Returns true if ASTC textures are natively supported.
211 bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, 211 bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
212 const vk::DispatchLoaderDynamic& dldi) const;
213 212
214 /// Returns true if a format is supported. 213 /// Returns true if a format is supported.
215 bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, 214 bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
216 FormatType format_type) const; 215 FormatType format_type) const;
217 216
218 /// Returns the device properties for Vulkan formats. 217 vk::DeviceDispatch dld; ///< Device function pointers.
219 static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( 218 vk::PhysicalDevice physical; ///< Physical device.
220 const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); 219 VkPhysicalDeviceProperties properties; ///< Device properties.
221 220 vk::Device logical; ///< Logical device.
222 const vk::PhysicalDevice physical; ///< Physical device. 221 vk::Queue graphics_queue; ///< Main graphics queue.
223 vk::DispatchLoaderDynamic dld; ///< Device function pointers. 222 vk::Queue present_queue; ///< Main present queue.
224 vk::PhysicalDeviceProperties properties; ///< Device properties. 223 u32 graphics_family{}; ///< Main graphics queue family index.
225 UniqueDevice logical; ///< Logical device. 224 u32 present_family{}; ///< Main present queue family index.
226 vk::Queue graphics_queue; ///< Main graphics queue. 225 VkDriverIdKHR driver_id{}; ///< Driver ID.
227 vk::Queue present_queue; ///< Main present queue. 226 VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
228 u32 graphics_family{}; ///< Main graphics queue family index. 227 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
229 u32 present_family{}; ///< Main present queue family index. 228 bool is_float16_supported{}; ///< Support for float16 arithmetics.
230 vk::DriverIdKHR driver_id{}; ///< Driver ID. 229 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
231 vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
232 bool is_optimal_astc_supported{}; ///< Support for native ASTC.
233 bool is_float16_supported{}; ///< Support for float16 arithmetics.
234 bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
235 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. 230 bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
236 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. 231 bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
237 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. 232 bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
@@ -245,7 +240,7 @@ private:
245 std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. 240 std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.
246 241
247 /// Format properties dictionary. 242 /// Format properties dictionary.
248 std::unordered_map<vk::Format, vk::FormatProperties> format_properties; 243 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
249}; 244};
250 245
251} // namespace Vulkan 246} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 6a02403c1..b540b838d 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -2,11 +2,13 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <array>
6#include <cstring>
5#include <vector> 7#include <vector>
8
6#include "common/assert.h" 9#include "common/assert.h"
7#include "common/common_types.h" 10#include "common/common_types.h"
8#include "common/microprofile.h" 11#include "common/microprofile.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 12#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
11#include "video_core/renderer_vulkan/maxwell_to_vk.h" 13#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 14#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
@@ -16,6 +18,7 @@
16#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 18#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 19#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_update_descriptor.h" 20#include "video_core/renderer_vulkan/vk_update_descriptor.h"
21#include "video_core/renderer_vulkan/wrapper.h"
19 22
20namespace Vulkan { 23namespace Vulkan {
21 24
@@ -23,21 +26,26 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache);
23 26
24namespace { 27namespace {
25 28
26vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { 29VkStencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) {
27 return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail), 30 VkStencilOpState state;
28 MaxwellToVK::StencilOp(face.action_depth_pass), 31 state.failOp = MaxwellToVK::StencilOp(face.action_stencil_fail);
29 MaxwellToVK::StencilOp(face.action_depth_fail), 32 state.passOp = MaxwellToVK::StencilOp(face.action_depth_pass);
30 MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0); 33 state.depthFailOp = MaxwellToVK::StencilOp(face.action_depth_fail);
34 state.compareOp = MaxwellToVK::ComparisonOp(face.test_func);
35 state.compareMask = 0;
36 state.writeMask = 0;
37 state.reference = 0;
38 return state;
31} 39}
32 40
33bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) { 41bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
34 static constexpr std::array unsupported_topologies = { 42 static constexpr std::array unsupported_topologies = {
35 vk::PrimitiveTopology::ePointList, 43 VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
36 vk::PrimitiveTopology::eLineList, 44 VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
37 vk::PrimitiveTopology::eTriangleList, 45 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
38 vk::PrimitiveTopology::eLineListWithAdjacency, 46 VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
39 vk::PrimitiveTopology::eTriangleListWithAdjacency, 47 VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
40 vk::PrimitiveTopology::ePatchList}; 48 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST};
41 return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), 49 return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
42 topology) == std::end(unsupported_topologies); 50 topology) == std::end(unsupported_topologies);
43} 51}
@@ -49,7 +57,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
49 VKUpdateDescriptorQueue& update_descriptor_queue, 57 VKUpdateDescriptorQueue& update_descriptor_queue,
50 VKRenderPassCache& renderpass_cache, 58 VKRenderPassCache& renderpass_cache,
51 const GraphicsPipelineCacheKey& key, 59 const GraphicsPipelineCacheKey& key,
52 const std::vector<vk::DescriptorSetLayoutBinding>& bindings, 60 vk::Span<VkDescriptorSetLayoutBinding> bindings,
53 const SPIRVProgram& program) 61 const SPIRVProgram& program)
54 : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, 62 : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()},
55 descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, 63 descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
@@ -63,7 +71,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
63 71
64VKGraphicsPipeline::~VKGraphicsPipeline() = default; 72VKGraphicsPipeline::~VKGraphicsPipeline() = default;
65 73
66vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { 74VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
67 if (!descriptor_template) { 75 if (!descriptor_template) {
68 return {}; 76 return {};
69 } 77 }
@@ -72,27 +80,32 @@ vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
72 return set; 80 return set;
73} 81}
74 82
75UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( 83vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
76 const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const { 84 vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
77 const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( 85 VkDescriptorSetLayoutCreateInfo ci;
78 {}, static_cast<u32>(bindings.size()), bindings.data()); 86 ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
79 87 ci.pNext = nullptr;
80 const auto dev = device.GetLogical(); 88 ci.flags = 0;
81 const auto& dld = device.GetDispatchLoader(); 89 ci.bindingCount = bindings.size();
82 return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); 90 ci.pBindings = bindings.data();
91 return device.GetLogical().CreateDescriptorSetLayout(ci);
83} 92}
84 93
85UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { 94vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
86 const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0, 95 VkPipelineLayoutCreateInfo ci;
87 nullptr); 96 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
88 const auto dev = device.GetLogical(); 97 ci.pNext = nullptr;
89 const auto& dld = device.GetDispatchLoader(); 98 ci.flags = 0;
90 return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); 99 ci.setLayoutCount = 1;
100 ci.pSetLayouts = descriptor_set_layout.address();
101 ci.pushConstantRangeCount = 0;
102 ci.pPushConstantRanges = nullptr;
103 return device.GetLogical().CreatePipelineLayout(ci);
91} 104}
92 105
93UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate( 106vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
94 const SPIRVProgram& program) const { 107 const SPIRVProgram& program) const {
95 std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; 108 std::vector<VkDescriptorUpdateTemplateEntry> template_entries;
96 u32 binding = 0; 109 u32 binding = 0;
97 u32 offset = 0; 110 u32 offset = 0;
98 for (const auto& stage : program) { 111 for (const auto& stage : program) {
@@ -102,38 +115,47 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat
102 } 115 }
103 if (template_entries.empty()) { 116 if (template_entries.empty()) {
104 // If the shader doesn't use descriptor sets, skip template creation. 117 // If the shader doesn't use descriptor sets, skip template creation.
105 return UniqueDescriptorUpdateTemplate{}; 118 return {};
106 } 119 }
107 120
108 const vk::DescriptorUpdateTemplateCreateInfo template_ci( 121 VkDescriptorUpdateTemplateCreateInfoKHR ci;
109 {}, static_cast<u32>(template_entries.size()), template_entries.data(), 122 ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR;
110 vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, 123 ci.pNext = nullptr;
111 vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); 124 ci.flags = 0;
112 125 ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size());
113 const auto dev = device.GetLogical(); 126 ci.pDescriptorUpdateEntries = template_entries.data();
114 const auto& dld = device.GetDispatchLoader(); 127 ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
115 return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); 128 ci.descriptorSetLayout = *descriptor_set_layout;
129 ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
130 ci.pipelineLayout = *layout;
131 ci.set = DESCRIPTOR_SET;
132 return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
116} 133}
117 134
118std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules( 135std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
119 const SPIRVProgram& program) const { 136 const SPIRVProgram& program) const {
120 std::vector<UniqueShaderModule> modules; 137 VkShaderModuleCreateInfo ci;
121 const auto dev = device.GetLogical(); 138 ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
122 const auto& dld = device.GetDispatchLoader(); 139 ci.pNext = nullptr;
140 ci.flags = 0;
141
142 std::vector<vk::ShaderModule> modules;
143 modules.reserve(Maxwell::MaxShaderStage);
123 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { 144 for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
124 const auto& stage = program[i]; 145 const auto& stage = program[i];
125 if (!stage) { 146 if (!stage) {
126 continue; 147 continue;
127 } 148 }
128 const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32), 149
129 stage->code.data()); 150 ci.codeSize = stage->code.size() * sizeof(u32);
130 modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld)); 151 ci.pCode = stage->code.data();
152 modules.push_back(device.GetLogical().CreateShaderModule(ci));
131 } 153 }
132 return modules; 154 return modules;
133} 155}
134 156
135UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, 157vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
136 const SPIRVProgram& program) const { 158 const SPIRVProgram& program) const {
137 const auto& vi = fixed_state.vertex_input; 159 const auto& vi = fixed_state.vertex_input;
138 const auto& ia = fixed_state.input_assembly; 160 const auto& ia = fixed_state.input_assembly;
139 const auto& ds = fixed_state.depth_stencil; 161 const auto& ds = fixed_state.depth_stencil;
@@ -141,19 +163,26 @@ UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& render
141 const auto& ts = fixed_state.tessellation; 163 const auto& ts = fixed_state.tessellation;
142 const auto& rs = fixed_state.rasterizer; 164 const auto& rs = fixed_state.rasterizer;
143 165
144 std::vector<vk::VertexInputBindingDescription> vertex_bindings; 166 std::vector<VkVertexInputBindingDescription> vertex_bindings;
145 std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; 167 std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
146 for (std::size_t i = 0; i < vi.num_bindings; ++i) { 168 for (std::size_t i = 0; i < vi.num_bindings; ++i) {
147 const auto& binding = vi.bindings[i]; 169 const auto& binding = vi.bindings[i];
148 const bool instanced = binding.divisor != 0; 170 const bool instanced = binding.divisor != 0;
149 const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; 171 const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
150 vertex_bindings.emplace_back(binding.index, binding.stride, rate); 172
173 auto& vertex_binding = vertex_bindings.emplace_back();
174 vertex_binding.binding = binding.index;
175 vertex_binding.stride = binding.stride;
176 vertex_binding.inputRate = rate;
177
151 if (instanced) { 178 if (instanced) {
152 vertex_binding_divisors.emplace_back(binding.index, binding.divisor); 179 auto& binding_divisor = vertex_binding_divisors.emplace_back();
180 binding_divisor.binding = binding.index;
181 binding_divisor.divisor = binding.divisor;
153 } 182 }
154 } 183 }
155 184
156 std::vector<vk::VertexInputAttributeDescription> vertex_attributes; 185 std::vector<VkVertexInputAttributeDescription> vertex_attributes;
157 const auto& input_attributes = program[0]->entries.attributes; 186 const auto& input_attributes = program[0]->entries.attributes;
158 for (std::size_t i = 0; i < vi.num_attributes; ++i) { 187 for (std::size_t i = 0; i < vi.num_attributes; ++i) {
159 const auto& attribute = vi.attributes[i]; 188 const auto& attribute = vi.attributes[i];
@@ -161,109 +190,194 @@ UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& render
161 // Skip attributes not used by the vertex shaders. 190 // Skip attributes not used by the vertex shaders.
162 continue; 191 continue;
163 } 192 }
164 vertex_attributes.emplace_back(attribute.index, attribute.buffer, 193 auto& vertex_attribute = vertex_attributes.emplace_back();
165 MaxwellToVK::VertexFormat(attribute.type, attribute.size), 194 vertex_attribute.location = attribute.index;
166 attribute.offset); 195 vertex_attribute.binding = attribute.buffer;
196 vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size);
197 vertex_attribute.offset = attribute.offset;
167 } 198 }
168 199
169 vk::PipelineVertexInputStateCreateInfo vertex_input_ci( 200 VkPipelineVertexInputStateCreateInfo vertex_input_ci;
170 {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(), 201 vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
171 static_cast<u32>(vertex_attributes.size()), vertex_attributes.data()); 202 vertex_input_ci.pNext = nullptr;
172 203 vertex_input_ci.flags = 0;
173 const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci( 204 vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size());
174 static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data()); 205 vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data();
206 vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size());
207 vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data();
208
209 VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci;
210 input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT;
211 input_divisor_ci.pNext = nullptr;
212 input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size());
213 input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data();
175 if (!vertex_binding_divisors.empty()) { 214 if (!vertex_binding_divisors.empty()) {
176 vertex_input_ci.pNext = &vertex_input_divisor_ci; 215 vertex_input_ci.pNext = &input_divisor_ci;
177 } 216 }
178 217
179 const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); 218 VkPipelineInputAssemblyStateCreateInfo input_assembly_ci;
180 const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci( 219 input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
181 {}, primitive_topology, 220 input_assembly_ci.pNext = nullptr;
182 ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology)); 221 input_assembly_ci.flags = 0;
183 222 input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology);
184 const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points); 223 input_assembly_ci.primitiveRestartEnable =
185 224 ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology);
186 const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr, 225
187 Maxwell::NumViewports, nullptr); 226 VkPipelineTessellationStateCreateInfo tessellation_ci;
188 227 tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
189 // TODO(Rodrigo): Find out what's the default register value for front face 228 tessellation_ci.pNext = nullptr;
190 const vk::PipelineRasterizationStateCreateInfo rasterizer_ci( 229 tessellation_ci.flags = 0;
191 {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill, 230 tessellation_ci.patchControlPoints = ts.patch_control_points;
192 rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone, 231
193 MaxwellToVK::FrontFace(rs.front_face), rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f); 232 VkPipelineViewportStateCreateInfo viewport_ci;
194 233 viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
195 const vk::PipelineMultisampleStateCreateInfo multisampling_ci( 234 viewport_ci.pNext = nullptr;
196 {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false); 235 viewport_ci.flags = 0;
197 236 viewport_ci.viewportCount = Maxwell::NumViewports;
198 const vk::CompareOp depth_test_compare = ds.depth_test_enable 237 viewport_ci.pViewports = nullptr;
199 ? MaxwellToVK::ComparisonOp(ds.depth_test_function) 238 viewport_ci.scissorCount = Maxwell::NumViewports;
200 : vk::CompareOp::eAlways; 239 viewport_ci.pScissors = nullptr;
201 240
202 const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci( 241 VkPipelineRasterizationStateCreateInfo rasterization_ci;
203 {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable, 242 rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
204 ds.stencil_enable, GetStencilFaceState(ds.front_stencil), 243 rasterization_ci.pNext = nullptr;
205 GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f); 244 rasterization_ci.flags = 0;
206 245 rasterization_ci.depthClampEnable = rs.depth_clamp_enable;
207 std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; 246 rasterization_ci.rasterizerDiscardEnable = VK_FALSE;
247 rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL;
248 rasterization_ci.cullMode =
249 rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE;
250 rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face);
251 rasterization_ci.depthBiasEnable = rs.depth_bias_enable;
252 rasterization_ci.depthBiasConstantFactor = 0.0f;
253 rasterization_ci.depthBiasClamp = 0.0f;
254 rasterization_ci.depthBiasSlopeFactor = 0.0f;
255 rasterization_ci.lineWidth = 1.0f;
256
257 VkPipelineMultisampleStateCreateInfo multisample_ci;
258 multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
259 multisample_ci.pNext = nullptr;
260 multisample_ci.flags = 0;
261 multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
262 multisample_ci.sampleShadingEnable = VK_FALSE;
263 multisample_ci.minSampleShading = 0.0f;
264 multisample_ci.pSampleMask = nullptr;
265 multisample_ci.alphaToCoverageEnable = VK_FALSE;
266 multisample_ci.alphaToOneEnable = VK_FALSE;
267
268 VkPipelineDepthStencilStateCreateInfo depth_stencil_ci;
269 depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
270 depth_stencil_ci.pNext = nullptr;
271 depth_stencil_ci.flags = 0;
272 depth_stencil_ci.depthTestEnable = ds.depth_test_enable;
273 depth_stencil_ci.depthWriteEnable = ds.depth_write_enable;
274 depth_stencil_ci.depthCompareOp = ds.depth_test_enable
275 ? MaxwellToVK::ComparisonOp(ds.depth_test_function)
276 : VK_COMPARE_OP_ALWAYS;
277 depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable;
278 depth_stencil_ci.stencilTestEnable = ds.stencil_enable;
279 depth_stencil_ci.front = GetStencilFaceState(ds.front_stencil);
280 depth_stencil_ci.back = GetStencilFaceState(ds.back_stencil);
281 depth_stencil_ci.minDepthBounds = 0.0f;
282 depth_stencil_ci.maxDepthBounds = 0.0f;
283
284 std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
208 const std::size_t num_attachments = 285 const std::size_t num_attachments =
209 std::min(cd.attachments_count, renderpass_params.color_attachments.size()); 286 std::min(cd.attachments_count, renderpass_params.color_attachments.size());
210 for (std::size_t i = 0; i < num_attachments; ++i) { 287 for (std::size_t i = 0; i < num_attachments; ++i) {
211 constexpr std::array component_table{ 288 static constexpr std::array component_table = {
212 vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG, 289 VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT,
213 vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA}; 290 VK_COLOR_COMPONENT_A_BIT};
214 const auto& blend = cd.attachments[i]; 291 const auto& blend = cd.attachments[i];
215 292
216 vk::ColorComponentFlags color_components{}; 293 VkColorComponentFlags color_components = 0;
217 for (std::size_t j = 0; j < component_table.size(); ++j) { 294 for (std::size_t j = 0; j < component_table.size(); ++j) {
218 if (blend.components[j]) 295 if (blend.components[j]) {
219 color_components |= component_table[j]; 296 color_components |= component_table[j];
297 }
220 } 298 }
221 299
222 cb_attachments[i] = vk::PipelineColorBlendAttachmentState( 300 VkPipelineColorBlendAttachmentState& attachment = cb_attachments[i];
223 blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func), 301 attachment.blendEnable = blend.enable;
224 MaxwellToVK::BlendFactor(blend.dst_rgb_func), 302 attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.src_rgb_func);
225 MaxwellToVK::BlendEquation(blend.rgb_equation), 303 attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.dst_rgb_func);
226 MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func), 304 attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.rgb_equation);
227 MaxwellToVK::BlendEquation(blend.a_equation), color_components); 305 attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.src_a_func);
306 attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.dst_a_func);
307 attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.a_equation);
308 attachment.colorWriteMask = color_components;
228 } 309 }
229 const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy, 310
230 static_cast<u32>(num_attachments), 311 VkPipelineColorBlendStateCreateInfo color_blend_ci;
231 cb_attachments.data(), {}); 312 color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
232 313 color_blend_ci.pNext = nullptr;
233 constexpr std::array dynamic_states = { 314 color_blend_ci.flags = 0;
234 vk::DynamicState::eViewport, vk::DynamicState::eScissor, 315 color_blend_ci.logicOpEnable = VK_FALSE;
235 vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants, 316 color_blend_ci.logicOp = VK_LOGIC_OP_COPY;
236 vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask, 317 color_blend_ci.attachmentCount = static_cast<u32>(num_attachments);
237 vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference}; 318 color_blend_ci.pAttachments = cb_attachments.data();
238 const vk::PipelineDynamicStateCreateInfo dynamic_state_ci( 319 std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants));
239 {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); 320
240 321 static constexpr std::array dynamic_states = {
241 vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; 322 VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR,
323 VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS,
324 VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
325 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE};
326
327 VkPipelineDynamicStateCreateInfo dynamic_state_ci;
328 dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
329 dynamic_state_ci.pNext = nullptr;
330 dynamic_state_ci.flags = 0;
331 dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size());
332 dynamic_state_ci.pDynamicStates = dynamic_states.data();
333
334 VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci;
335 subgroup_size_ci.sType =
336 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT;
337 subgroup_size_ci.pNext = nullptr;
242 subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; 338 subgroup_size_ci.requiredSubgroupSize = GuestWarpSize;
243 339
244 std::vector<vk::PipelineShaderStageCreateInfo> shader_stages; 340 std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
245 std::size_t module_index = 0; 341 std::size_t module_index = 0;
246 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 342 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
247 if (!program[stage]) { 343 if (!program[stage]) {
248 continue; 344 continue;
249 } 345 }
250 const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage); 346 VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
251 const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum); 347 stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
252 auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage, 348 stage_ci.pNext = nullptr;
253 *modules[module_index++], "main", nullptr); 349 stage_ci.flags = 0;
254 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) { 350 stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage));
351 stage_ci.module = *modules[module_index++];
352 stage_ci.pName = "main";
353 stage_ci.pSpecializationInfo = nullptr;
354
355 if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
255 stage_ci.pNext = &subgroup_size_ci; 356 stage_ci.pNext = &subgroup_size_ci;
256 } 357 }
257 } 358 }
258 359
259 const vk::GraphicsPipelineCreateInfo create_info( 360 VkGraphicsPipelineCreateInfo ci;
260 {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci, 361 ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
261 &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci, 362 ci.pNext = nullptr;
262 &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0); 363 ci.flags = 0;
263 364 ci.stageCount = static_cast<u32>(shader_stages.size());
264 const auto dev = device.GetLogical(); 365 ci.pStages = shader_stages.data();
265 const auto& dld = device.GetDispatchLoader(); 366 ci.pVertexInputState = &vertex_input_ci;
266 return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld); 367 ci.pInputAssemblyState = &input_assembly_ci;
368 ci.pTessellationState = &tessellation_ci;
369 ci.pViewportState = &viewport_ci;
370 ci.pRasterizationState = &rasterization_ci;
371 ci.pMultisampleState = &multisample_ci;
372 ci.pDepthStencilState = &depth_stencil_ci;
373 ci.pColorBlendState = &color_blend_ci;
374 ci.pDynamicState = &dynamic_state_ci;
375 ci.layout = *layout;
376 ci.renderPass = renderpass;
377 ci.subpass = 0;
378 ci.basePipelineHandle = nullptr;
379 ci.basePipelineIndex = 0;
380 return device.GetLogical().CreateGraphicsPipeline(ci);
267} 381}
268 382
269} // namespace Vulkan 383} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 4f5e4ea2d..7aba70960 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -11,12 +11,12 @@
11#include <vector> 11#include <vector>
12 12
13#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 14#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
16#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 15#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
17#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 16#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
18#include "video_core/renderer_vulkan/vk_resource_manager.h" 17#include "video_core/renderer_vulkan/vk_resource_manager.h"
19#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 18#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
19#include "video_core/renderer_vulkan/wrapper.h"
20 20
21namespace Vulkan { 21namespace Vulkan {
22 22
@@ -39,52 +39,52 @@ public:
39 VKUpdateDescriptorQueue& update_descriptor_queue, 39 VKUpdateDescriptorQueue& update_descriptor_queue,
40 VKRenderPassCache& renderpass_cache, 40 VKRenderPassCache& renderpass_cache,
41 const GraphicsPipelineCacheKey& key, 41 const GraphicsPipelineCacheKey& key,
42 const std::vector<vk::DescriptorSetLayoutBinding>& bindings, 42 vk::Span<VkDescriptorSetLayoutBinding> bindings,
43 const SPIRVProgram& program); 43 const SPIRVProgram& program);
44 ~VKGraphicsPipeline(); 44 ~VKGraphicsPipeline();
45 45
46 vk::DescriptorSet CommitDescriptorSet(); 46 VkDescriptorSet CommitDescriptorSet();
47 47
48 vk::Pipeline GetHandle() const { 48 VkPipeline GetHandle() const {
49 return *pipeline; 49 return *pipeline;
50 } 50 }
51 51
52 vk::PipelineLayout GetLayout() const { 52 VkPipelineLayout GetLayout() const {
53 return *layout; 53 return *layout;
54 } 54 }
55 55
56 vk::RenderPass GetRenderPass() const { 56 VkRenderPass GetRenderPass() const {
57 return renderpass; 57 return renderpass;
58 } 58 }
59 59
60private: 60private:
61 UniqueDescriptorSetLayout CreateDescriptorSetLayout( 61 vk::DescriptorSetLayout CreateDescriptorSetLayout(
62 const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const; 62 vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
63 63
64 UniquePipelineLayout CreatePipelineLayout() const; 64 vk::PipelineLayout CreatePipelineLayout() const;
65 65
66 UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate( 66 vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
67 const SPIRVProgram& program) const; 67 const SPIRVProgram& program) const;
68 68
69 std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const; 69 std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
70 70
71 UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params, 71 vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params,
72 const SPIRVProgram& program) const; 72 const SPIRVProgram& program) const;
73 73
74 const VKDevice& device; 74 const VKDevice& device;
75 VKScheduler& scheduler; 75 VKScheduler& scheduler;
76 const FixedPipelineState fixed_state; 76 const FixedPipelineState fixed_state;
77 const u64 hash; 77 const u64 hash;
78 78
79 UniqueDescriptorSetLayout descriptor_set_layout; 79 vk::DescriptorSetLayout descriptor_set_layout;
80 DescriptorAllocator descriptor_allocator; 80 DescriptorAllocator descriptor_allocator;
81 VKUpdateDescriptorQueue& update_descriptor_queue; 81 VKUpdateDescriptorQueue& update_descriptor_queue;
82 UniquePipelineLayout layout; 82 vk::PipelineLayout layout;
83 UniqueDescriptorUpdateTemplate descriptor_template; 83 vk::DescriptorUpdateTemplateKHR descriptor_template;
84 std::vector<UniqueShaderModule> modules; 84 std::vector<vk::ShaderModule> modules;
85 85
86 vk::RenderPass renderpass; 86 VkRenderPass renderpass;
87 UniquePipeline pipeline; 87 vk::Pipeline pipeline;
88}; 88};
89 89
90} // namespace Vulkan 90} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp
index 4bcbef959..9bceb3861 100644
--- a/src/video_core/renderer_vulkan/vk_image.cpp
+++ b/src/video_core/renderer_vulkan/vk_image.cpp
@@ -6,22 +6,21 @@
6#include <vector> 6#include <vector>
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_device.h" 9#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_image.h" 10#include "video_core/renderer_vulkan/vk_image.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h" 11#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
16VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, 16VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci,
17 const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask) 17 VkImageAspectFlags aspect_mask)
18 : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, 18 : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask},
19 image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { 19 image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} {
20 UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, 20 UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0,
21 "Queue family tracking is not implemented"); 21 "Queue family tracking is not implemented");
22 22
23 const auto dev = device.GetLogical(); 23 image = device.GetLogical().CreateImage(image_ci);
24 image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader());
25 24
26 const u32 num_ranges = image_num_layers * image_num_levels; 25 const u32 num_ranges = image_num_layers * image_num_levels;
27 barriers.resize(num_ranges); 26 barriers.resize(num_ranges);
@@ -31,8 +30,8 @@ VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler,
31VKImage::~VKImage() = default; 30VKImage::~VKImage() = default;
32 31
33void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 32void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
34 vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, 33 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
35 vk::ImageLayout new_layout) { 34 VkImageLayout new_layout) {
36 if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { 35 if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
37 return; 36 return;
38 } 37 }
@@ -43,9 +42,21 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num
43 const u32 layer = base_layer + layer_it; 42 const u32 layer = base_layer + layer_it;
44 const u32 level = base_level + level_it; 43 const u32 level = base_level + level_it;
45 auto& state = GetSubrangeState(layer, level); 44 auto& state = GetSubrangeState(layer, level);
46 barriers[cursor] = vk::ImageMemoryBarrier( 45 auto& barrier = barriers[cursor];
47 state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED, 46 barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
48 VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1}); 47 barrier.pNext = nullptr;
48 barrier.srcAccessMask = state.access;
49 barrier.dstAccessMask = new_access;
50 barrier.oldLayout = state.layout;
51 barrier.newLayout = new_layout;
52 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
53 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
54 barrier.image = *image;
55 barrier.subresourceRange.aspectMask = aspect_mask;
56 barrier.subresourceRange.baseMipLevel = level;
57 barrier.subresourceRange.levelCount = 1;
58 barrier.subresourceRange.baseArrayLayer = layer;
59 barrier.subresourceRange.layerCount = 1;
49 state.access = new_access; 60 state.access = new_access;
50 state.layout = new_layout; 61 state.layout = new_layout;
51 } 62 }
@@ -53,16 +64,16 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num
53 64
54 scheduler.RequestOutsideRenderPassOperationContext(); 65 scheduler.RequestOutsideRenderPassOperationContext();
55 66
56 scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) { 67 scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) {
57 // TODO(Rodrigo): Implement a way to use the latest stage across subresources. 68 // TODO(Rodrigo): Implement a way to use the latest stage across subresources.
58 constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands; 69 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
59 cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr, 70 VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {},
60 static_cast<u32>(cursor), barriers.data(), dld); 71 vk::Span(barriers.data(), cursor));
61 }); 72 });
62} 73}
63 74
64bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 75bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
65 vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept { 76 VkAccessFlags new_access, VkImageLayout new_layout) noexcept {
66 const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && 77 const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
67 base_level == 0 && num_levels == image_num_levels; 78 base_level == 0 && num_levels == image_num_levels;
68 if (!is_full_range) { 79 if (!is_full_range) {
@@ -91,11 +102,21 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num
91 102
92void VKImage::CreatePresentView() { 103void VKImage::CreatePresentView() {
93 // Image type has to be 2D to be presented. 104 // Image type has to be 2D to be presented.
94 const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {}, 105 VkImageViewCreateInfo image_view_ci;
95 {aspect_mask, 0, 1, 0, 1}); 106 image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
96 const auto dev = device.GetLogical(); 107 image_view_ci.pNext = nullptr;
97 const auto& dld = device.GetDispatchLoader(); 108 image_view_ci.flags = 0;
98 present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld); 109 image_view_ci.image = *image;
110 image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
111 image_view_ci.format = format;
112 image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
113 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
114 image_view_ci.subresourceRange.aspectMask = aspect_mask;
115 image_view_ci.subresourceRange.baseMipLevel = 0;
116 image_view_ci.subresourceRange.levelCount = 1;
117 image_view_ci.subresourceRange.baseArrayLayer = 0;
118 image_view_ci.subresourceRange.layerCount = 1;
119 present_view = device.GetLogical().CreateImageView(image_view_ci);
99} 120}
100 121
101VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { 122VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h
index b78242512..b4d7229e5 100644
--- a/src/video_core/renderer_vulkan/vk_image.h
+++ b/src/video_core/renderer_vulkan/vk_image.h
@@ -8,7 +8,7 @@
8#include <vector> 8#include <vector>
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h" 11#include "video_core/renderer_vulkan/wrapper.h"
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
@@ -18,16 +18,16 @@ class VKScheduler;
18class VKImage { 18class VKImage {
19public: 19public:
20 explicit VKImage(const VKDevice& device, VKScheduler& scheduler, 20 explicit VKImage(const VKDevice& device, VKScheduler& scheduler,
21 const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask); 21 const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask);
22 ~VKImage(); 22 ~VKImage();
23 23
24 /// Records in the passed command buffer an image transition and updates the state of the image. 24 /// Records in the passed command buffer an image transition and updates the state of the image.
25 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 25 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
26 vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, 26 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
27 vk::ImageLayout new_layout); 27 VkImageLayout new_layout);
28 28
29 /// Returns a view compatible with presentation, the image has to be 2D. 29 /// Returns a view compatible with presentation, the image has to be 2D.
30 vk::ImageView GetPresentView() { 30 VkImageView GetPresentView() {
31 if (!present_view) { 31 if (!present_view) {
32 CreatePresentView(); 32 CreatePresentView();
33 } 33 }
@@ -35,28 +35,28 @@ public:
35 } 35 }
36 36
37 /// Returns the Vulkan image handler. 37 /// Returns the Vulkan image handler.
38 vk::Image GetHandle() const { 38 const vk::Image& GetHandle() const {
39 return *image; 39 return image;
40 } 40 }
41 41
42 /// Returns the Vulkan format for this image. 42 /// Returns the Vulkan format for this image.
43 vk::Format GetFormat() const { 43 VkFormat GetFormat() const {
44 return format; 44 return format;
45 } 45 }
46 46
47 /// Returns the Vulkan aspect mask. 47 /// Returns the Vulkan aspect mask.
48 vk::ImageAspectFlags GetAspectMask() const { 48 VkImageAspectFlags GetAspectMask() const {
49 return aspect_mask; 49 return aspect_mask;
50 } 50 }
51 51
52private: 52private:
53 struct SubrangeState final { 53 struct SubrangeState final {
54 vk::AccessFlags access{}; ///< Current access bits. 54 VkAccessFlags access = 0; ///< Current access bits.
55 vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout. 55 VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout.
56 }; 56 };
57 57
58 bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 58 bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
59 vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept; 59 VkAccessFlags new_access, VkImageLayout new_layout) noexcept;
60 60
61 /// Creates a presentation view. 61 /// Creates a presentation view.
62 void CreatePresentView(); 62 void CreatePresentView();
@@ -67,16 +67,16 @@ private:
67 const VKDevice& device; ///< Device handler. 67 const VKDevice& device; ///< Device handler.
68 VKScheduler& scheduler; ///< Device scheduler. 68 VKScheduler& scheduler; ///< Device scheduler.
69 69
70 const vk::Format format; ///< Vulkan format. 70 const VkFormat format; ///< Vulkan format.
71 const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask. 71 const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
72 const u32 image_num_layers; ///< Number of layers. 72 const u32 image_num_layers; ///< Number of layers.
73 const u32 image_num_levels; ///< Number of mipmap levels. 73 const u32 image_num_levels; ///< Number of mipmap levels.
74 74
75 UniqueImage image; ///< Image handle. 75 vk::Image image; ///< Image handle.
76 UniqueImageView present_view; ///< Image view compatible with presentation. 76 vk::ImageView present_view; ///< Image view compatible with presentation.
77 77
78 std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers. 78 std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers.
79 std::vector<SubrangeState> subrange_states; ///< Current subrange state. 79 std::vector<SubrangeState> subrange_states; ///< Current subrange state.
80 80
81 bool state_diverged = false; ///< True when subresources mismatch in layout. 81 bool state_diverged = false; ///< True when subresources mismatch in layout.
82}; 82};
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
index 9cc9979d0..6a9e658bf 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp
@@ -11,9 +11,9 @@
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h" 13#include "common/logging/log.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_device.h" 14#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_memory_manager.h" 15#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
@@ -30,17 +30,11 @@ u64 GetAllocationChunkSize(u64 required_size) {
30class VKMemoryAllocation final { 30class VKMemoryAllocation final {
31public: 31public:
32 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, 32 explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory,
33 vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) 33 VkMemoryPropertyFlags properties, u64 allocation_size, u32 type)
34 : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, 34 : device{device}, memory{std::move(memory)}, properties{properties},
35 shifted_type{ShiftType(type)} {} 35 allocation_size{allocation_size}, shifted_type{ShiftType(type)} {}
36
37 ~VKMemoryAllocation() {
38 const auto dev = device.GetLogical();
39 const auto& dld = device.GetDispatchLoader();
40 dev.free(memory, nullptr, dld);
41 }
42 36
43 VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { 37 VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) {
44 auto found = TryFindFreeSection(free_iterator, allocation_size, 38 auto found = TryFindFreeSection(free_iterator, allocation_size,
45 static_cast<u64>(commit_size), static_cast<u64>(alignment)); 39 static_cast<u64>(commit_size), static_cast<u64>(alignment));
46 if (!found) { 40 if (!found) {
@@ -73,9 +67,8 @@ public:
73 } 67 }
74 68
75 /// Returns whether this allocation is compatible with the arguments. 69 /// Returns whether this allocation is compatible with the arguments.
76 bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const { 70 bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const {
77 return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) && 71 return (wanted_properties & properties) && (type_mask & shifted_type) != 0;
78 (type_mask & shifted_type) != 0;
79 } 72 }
80 73
81private: 74private:
@@ -111,11 +104,11 @@ private:
111 return std::nullopt; 104 return std::nullopt;
112 } 105 }
113 106
114 const VKDevice& device; ///< Vulkan device. 107 const VKDevice& device; ///< Vulkan device.
115 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. 108 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
116 const vk::MemoryPropertyFlags properties; ///< Vulkan properties. 109 const VkMemoryPropertyFlags properties; ///< Vulkan properties.
117 const u64 allocation_size; ///< Size of this allocation. 110 const u64 allocation_size; ///< Size of this allocation.
118 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. 111 const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted.
119 112
120 /// Hints where the next free region is likely going to be. 113 /// Hints where the next free region is likely going to be.
121 u64 free_iterator{}; 114 u64 free_iterator{};
@@ -125,22 +118,20 @@ private:
125}; 118};
126 119
127VKMemoryManager::VKMemoryManager(const VKDevice& device) 120VKMemoryManager::VKMemoryManager(const VKDevice& device)
128 : device{device}, properties{device.GetPhysical().getMemoryProperties( 121 : device{device}, properties{device.GetPhysical().GetMemoryProperties()},
129 device.GetDispatchLoader())},
130 is_memory_unified{GetMemoryUnified(properties)} {} 122 is_memory_unified{GetMemoryUnified(properties)} {}
131 123
132VKMemoryManager::~VKMemoryManager() = default; 124VKMemoryManager::~VKMemoryManager() = default;
133 125
134VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, 126VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements,
135 bool host_visible) { 127 bool host_visible) {
136 const u64 chunk_size = GetAllocationChunkSize(requirements.size); 128 const u64 chunk_size = GetAllocationChunkSize(requirements.size);
137 129
138 // When a host visible commit is asked, search for host visible and coherent, otherwise search 130 // When a host visible commit is asked, search for host visible and coherent, otherwise search
139 // for a fast device local type. 131 // for a fast device local type.
140 const vk::MemoryPropertyFlags wanted_properties = 132 const VkMemoryPropertyFlags wanted_properties =
141 host_visible 133 host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
142 ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent 134 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
143 : vk::MemoryPropertyFlagBits::eDeviceLocal;
144 135
145 if (auto commit = TryAllocCommit(requirements, wanted_properties)) { 136 if (auto commit = TryAllocCommit(requirements, wanted_properties)) {
146 return commit; 137 return commit;
@@ -161,23 +152,19 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirement
161 return commit; 152 return commit;
162} 153}
163 154
164VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { 155VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) {
165 const auto dev = device.GetLogical(); 156 auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible);
166 const auto& dld = device.GetDispatchLoader(); 157 buffer.BindMemory(commit->GetMemory(), commit->GetOffset());
167 auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible);
168 dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld);
169 return commit; 158 return commit;
170} 159}
171 160
172VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { 161VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) {
173 const auto dev = device.GetLogical(); 162 auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible);
174 const auto& dld = device.GetDispatchLoader(); 163 image.BindMemory(commit->GetMemory(), commit->GetOffset());
175 auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible);
176 dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld);
177 return commit; 164 return commit;
178} 165}
179 166
180bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, 167bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask,
181 u64 size) { 168 u64 size) {
182 const u32 type = [&] { 169 const u32 type = [&] {
183 for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { 170 for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
@@ -191,24 +178,26 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32
191 return 0U; 178 return 0U;
192 }(); 179 }();
193 180
194 const auto dev = device.GetLogical();
195 const auto& dld = device.GetDispatchLoader();
196
197 // Try to allocate found type. 181 // Try to allocate found type.
198 const vk::MemoryAllocateInfo memory_ai(size, type); 182 VkMemoryAllocateInfo memory_ai;
199 vk::DeviceMemory memory; 183 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
200 if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); 184 memory_ai.pNext = nullptr;
201 res != vk::Result::eSuccess) { 185 memory_ai.allocationSize = size;
202 LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); 186 memory_ai.memoryTypeIndex = type;
187
188 vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai);
189 if (!memory) {
190 LOG_CRITICAL(Render_Vulkan, "Device allocation failed!");
203 return false; 191 return false;
204 } 192 }
205 allocations.push_back( 193
206 std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); 194 allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory),
195 wanted_properties, size, type));
207 return true; 196 return true;
208} 197}
209 198
210VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, 199VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements,
211 vk::MemoryPropertyFlags wanted_properties) { 200 VkMemoryPropertyFlags wanted_properties) {
212 for (auto& allocation : allocations) { 201 for (auto& allocation : allocations) {
213 if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { 202 if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) {
214 continue; 203 continue;
@@ -220,10 +209,9 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req
220 return {}; 209 return {};
221} 210}
222 211
223/*static*/ bool VKMemoryManager::GetMemoryUnified( 212bool VKMemoryManager::GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties) {
224 const vk::PhysicalDeviceMemoryProperties& properties) {
225 for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { 213 for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) {
226 if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { 214 if (!(properties.memoryHeaps[heap_index].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) {
227 // Memory is considered unified when heaps are device local only. 215 // Memory is considered unified when heaps are device local only.
228 return false; 216 return false;
229 } 217 }
@@ -232,23 +220,19 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req
232} 220}
233 221
234VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, 222VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
235 vk::DeviceMemory memory, u64 begin, u64 end) 223 const vk::DeviceMemory& memory, u64 begin, u64 end)
236 : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} 224 : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {}
237 225
238VKMemoryCommitImpl::~VKMemoryCommitImpl() { 226VKMemoryCommitImpl::~VKMemoryCommitImpl() {
239 allocation->Free(this); 227 allocation->Free(this);
240} 228}
241 229
242MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { 230MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
243 const auto dev = device.GetLogical(); 231 return MemoryMap{this, memory.Map(interval.first + offset_, size)};
244 const auto address = reinterpret_cast<u8*>(
245 dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader()));
246 return MemoryMap{this, address};
247} 232}
248 233
249void VKMemoryCommitImpl::Unmap() const { 234void VKMemoryCommitImpl::Unmap() const {
250 const auto dev = device.GetLogical(); 235 memory.Unmap();
251 dev.unmapMemory(memory, device.GetDispatchLoader());
252} 236}
253 237
254MemoryMap VKMemoryCommitImpl::Map() const { 238MemoryMap VKMemoryCommitImpl::Map() const {
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h
index cd00bb91b..35ee54d30 100644
--- a/src/video_core/renderer_vulkan/vk_memory_manager.h
+++ b/src/video_core/renderer_vulkan/vk_memory_manager.h
@@ -8,7 +8,7 @@
8#include <utility> 8#include <utility>
9#include <vector> 9#include <vector>
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h" 11#include "video_core/renderer_vulkan/wrapper.h"
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
@@ -32,13 +32,13 @@ public:
32 * memory. When passing false, it will try to allocate device local memory. 32 * memory. When passing false, it will try to allocate device local memory.
33 * @returns A memory commit. 33 * @returns A memory commit.
34 */ 34 */
35 VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); 35 VKMemoryCommit Commit(const VkMemoryRequirements& reqs, bool host_visible);
36 36
37 /// Commits memory required by the buffer and binds it. 37 /// Commits memory required by the buffer and binds it.
38 VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible); 38 VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible);
39 39
40 /// Commits memory required by the image and binds it. 40 /// Commits memory required by the image and binds it.
41 VKMemoryCommit Commit(vk::Image image, bool host_visible); 41 VKMemoryCommit Commit(const vk::Image& image, bool host_visible);
42 42
43 /// Returns true if the memory allocations are done always in host visible and coherent memory. 43 /// Returns true if the memory allocations are done always in host visible and coherent memory.
44 bool IsMemoryUnified() const { 44 bool IsMemoryUnified() const {
@@ -47,18 +47,18 @@ public:
47 47
48private: 48private:
49 /// Allocates a chunk of memory. 49 /// Allocates a chunk of memory.
50 bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); 50 bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size);
51 51
52 /// Tries to allocate a memory commit. 52 /// Tries to allocate a memory commit.
53 VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, 53 VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements,
54 vk::MemoryPropertyFlags wanted_properties); 54 VkMemoryPropertyFlags wanted_properties);
55 55
56 /// Returns true if the device uses an unified memory model. 56 /// Returns true if the device uses an unified memory model.
57 static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); 57 static bool GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties);
58 58
59 const VKDevice& device; ///< Device handler. 59 const VKDevice& device; ///< Device handler.
60 const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. 60 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
61 const bool is_memory_unified; ///< True if memory model is unified. 61 const bool is_memory_unified; ///< True if memory model is unified.
62 std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. 62 std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations.
63}; 63};
64 64
@@ -68,7 +68,7 @@ class VKMemoryCommitImpl final {
68 68
69public: 69public:
70 explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, 70 explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation,
71 vk::DeviceMemory memory, u64 begin, u64 end); 71 const vk::DeviceMemory& memory, u64 begin, u64 end);
72 ~VKMemoryCommitImpl(); 72 ~VKMemoryCommitImpl();
73 73
74 /// Maps a memory region and returns a pointer to it. 74 /// Maps a memory region and returns a pointer to it.
@@ -80,13 +80,13 @@ public:
80 MemoryMap Map() const; 80 MemoryMap Map() const;
81 81
82 /// Returns the Vulkan memory handler. 82 /// Returns the Vulkan memory handler.
83 vk::DeviceMemory GetMemory() const { 83 VkDeviceMemory GetMemory() const {
84 return memory; 84 return *memory;
85 } 85 }
86 86
87 /// Returns the start position of the commit relative to the allocation. 87 /// Returns the start position of the commit relative to the allocation.
88 vk::DeviceSize GetOffset() const { 88 VkDeviceSize GetOffset() const {
89 return static_cast<vk::DeviceSize>(interval.first); 89 return static_cast<VkDeviceSize>(interval.first);
90 } 90 }
91 91
92private: 92private:
@@ -94,8 +94,8 @@ private:
94 void Unmap() const; 94 void Unmap() const;
95 95
96 const VKDevice& device; ///< Vulkan device. 96 const VKDevice& device; ///< Vulkan device.
97 const vk::DeviceMemory& memory; ///< Vulkan device memory handler.
97 std::pair<u64, u64> interval{}; ///< Interval where the commit exists. 98 std::pair<u64, u64> interval{}; ///< Interval where the commit exists.
98 vk::DeviceMemory memory; ///< Vulkan device memory handler.
99 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. 99 VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
100}; 100};
101 101
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 557b9d662..90e3a8edd 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -13,7 +13,6 @@
13#include "video_core/engines/kepler_compute.h" 13#include "video_core/engines/kepler_compute.h"
14#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/memory_manager.h" 15#include "video_core/memory_manager.h"
16#include "video_core/renderer_vulkan/declarations.h"
17#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 16#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
18#include "video_core/renderer_vulkan/maxwell_to_vk.h" 17#include "video_core/renderer_vulkan/maxwell_to_vk.h"
19#include "video_core/renderer_vulkan/vk_compute_pipeline.h" 18#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
@@ -26,6 +25,7 @@
26#include "video_core/renderer_vulkan/vk_resource_manager.h" 25#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_scheduler.h" 26#include "video_core/renderer_vulkan/vk_scheduler.h"
28#include "video_core/renderer_vulkan/vk_update_descriptor.h" 27#include "video_core/renderer_vulkan/vk_update_descriptor.h"
28#include "video_core/renderer_vulkan/wrapper.h"
29#include "video_core/shader/compiler_settings.h" 29#include "video_core/shader/compiler_settings.h"
30 30
31namespace Vulkan { 31namespace Vulkan {
@@ -36,12 +36,11 @@ using Tegra::Engines::ShaderType;
36 36
37namespace { 37namespace {
38 38
39// C++20's using enum 39constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
40constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; 40constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
41constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; 41constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
42constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; 42constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
43constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; 43constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
44constexpr auto eStorageImage = vk::DescriptorType::eStorageImage;
45 44
46constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ 45constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
47 VideoCommon::Shader::CompileDepth::FullDecompile}; 46 VideoCommon::Shader::CompileDepth::FullDecompile};
@@ -126,43 +125,48 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
126 } 125 }
127} 126}
128 127
129template <vk::DescriptorType descriptor_type, class Container> 128template <VkDescriptorType descriptor_type, class Container>
130void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding, 129void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
131 vk::ShaderStageFlags stage_flags, const Container& container) { 130 VkShaderStageFlags stage_flags, const Container& container) {
132 const u32 num_entries = static_cast<u32>(std::size(container)); 131 const u32 num_entries = static_cast<u32>(std::size(container));
133 for (std::size_t i = 0; i < num_entries; ++i) { 132 for (std::size_t i = 0; i < num_entries; ++i) {
134 u32 count = 1; 133 u32 count = 1;
135 if constexpr (descriptor_type == eCombinedImageSampler) { 134 if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
136 // Combined image samplers can be arrayed. 135 // Combined image samplers can be arrayed.
137 count = container[i].Size(); 136 count = container[i].Size();
138 } 137 }
139 bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); 138 VkDescriptorSetLayoutBinding& entry = bindings.emplace_back();
139 entry.binding = binding++;
140 entry.descriptorType = descriptor_type;
141 entry.descriptorCount = count;
142 entry.stageFlags = stage_flags;
143 entry.pImmutableSamplers = nullptr;
140 } 144 }
141} 145}
142 146
143u32 FillDescriptorLayout(const ShaderEntries& entries, 147u32 FillDescriptorLayout(const ShaderEntries& entries,
144 std::vector<vk::DescriptorSetLayoutBinding>& bindings, 148 std::vector<VkDescriptorSetLayoutBinding>& bindings,
145 Maxwell::ShaderProgram program_type, u32 base_binding) { 149 Maxwell::ShaderProgram program_type, u32 base_binding) {
146 const ShaderType stage = GetStageFromProgram(program_type); 150 const ShaderType stage = GetStageFromProgram(program_type);
147 const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); 151 const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
148 152
149 u32 binding = base_binding; 153 u32 binding = base_binding;
150 AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers); 154 AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
151 AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers); 155 AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
152 AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers); 156 AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers);
153 AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers); 157 AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
154 AddBindings<eStorageImage>(bindings, binding, flags, entries.images); 158 AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
155 return binding; 159 return binding;
156} 160}
157 161
158} // Anonymous namespace 162} // Anonymous namespace
159 163
160CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, 164CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
161 GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, 165 GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
162 ProgramCode program_code, u32 main_offset) 166 u32 main_offset)
163 : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, 167 : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
164 program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, 168 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
165 shader_ir{this->program_code, main_offset, compiler_settings, registry}, 169 compiler_settings, registry},
166 entries{GenerateShaderEntries(shader_ir)} {} 170 entries{GenerateShaderEntries(shader_ir)} {}
167 171
168CachedShader::~CachedShader() = default; 172CachedShader::~CachedShader() = default;
@@ -201,19 +205,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
201 205
202 auto& memory_manager{system.GPU().MemoryManager()}; 206 auto& memory_manager{system.GPU().MemoryManager()};
203 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 207 const GPUVAddr program_addr{GetShaderAddress(system, program)};
204 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 208 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
205 auto shader = TryGet(host_ptr); 209 ASSERT(cpu_addr);
210 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
206 if (!shader) { 211 if (!shader) {
212 const auto host_ptr{memory_manager.GetPointer(program_addr)};
213
207 // No shader found - create a new one 214 // No shader found - create a new one
208 constexpr u32 stage_offset = 10; 215 constexpr u32 stage_offset = 10;
209 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); 216 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
210 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); 217 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
211 218
212 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
213 ASSERT(cpu_addr);
214
215 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, 219 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
216 host_ptr, std::move(code), stage_offset); 220 std::move(code), stage_offset);
217 Register(shader); 221 Register(shader);
218 } 222 }
219 shaders[index] = std::move(shader); 223 shaders[index] = std::move(shader);
@@ -253,18 +257,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
253 257
254 auto& memory_manager = system.GPU().MemoryManager(); 258 auto& memory_manager = system.GPU().MemoryManager();
255 const auto program_addr = key.shader; 259 const auto program_addr = key.shader;
256 const auto host_ptr = memory_manager.GetPointer(program_addr);
257 260
258 auto shader = TryGet(host_ptr); 261 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
262 ASSERT(cpu_addr);
263
264 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
259 if (!shader) { 265 if (!shader) {
260 // No shader found - create a new one 266 // No shader found - create a new one
261 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 267 const auto host_ptr = memory_manager.GetPointer(program_addr);
262 ASSERT(cpu_addr);
263 268
264 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); 269 auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
265 constexpr u32 kernel_main_offset = 0; 270 constexpr u32 kernel_main_offset = 0;
266 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 271 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
267 program_addr, *cpu_addr, host_ptr, std::move(code), 272 program_addr, *cpu_addr, std::move(code),
268 kernel_main_offset); 273 kernel_main_offset);
269 Register(shader); 274 Register(shader);
270 } 275 }
@@ -317,7 +322,7 @@ void VKPipelineCache::Unregister(const Shader& shader) {
317 RasterizerCache::Unregister(shader); 322 RasterizerCache::Unregister(shader);
318} 323}
319 324
320std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> 325std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
321VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { 326VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
322 const auto& fixed_state = key.fixed_state; 327 const auto& fixed_state = key.fixed_state;
323 auto& memory_manager = system.GPU().MemoryManager(); 328 auto& memory_manager = system.GPU().MemoryManager();
@@ -334,7 +339,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
334 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 339 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
335 340
336 SPIRVProgram program; 341 SPIRVProgram program;
337 std::vector<vk::DescriptorSetLayoutBinding> bindings; 342 std::vector<VkDescriptorSetLayoutBinding> bindings;
338 343
339 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 344 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
340 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); 345 const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
@@ -345,8 +350,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
345 } 350 }
346 351
347 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); 352 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
348 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 353 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
349 const auto shader = TryGet(host_ptr); 354 ASSERT(cpu_addr);
355 const auto shader = TryGet(*cpu_addr);
350 ASSERT(shader); 356 ASSERT(shader);
351 357
352 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 358 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
@@ -369,32 +375,49 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
369 return {std::move(program), std::move(bindings)}; 375 return {std::move(program), std::move(bindings)};
370} 376}
371 377
372template <vk::DescriptorType descriptor_type, class Container> 378template <VkDescriptorType descriptor_type, class Container>
373void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding, 379void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
374 u32& offset, const Container& container) { 380 u32& offset, const Container& container) {
375 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); 381 static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
376 const u32 count = static_cast<u32>(std::size(container)); 382 const u32 count = static_cast<u32>(std::size(container));
377 383
378 if constexpr (descriptor_type == eCombinedImageSampler) { 384 if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
379 for (u32 i = 0; i < count; ++i) { 385 for (u32 i = 0; i < count; ++i) {
380 const u32 num_samplers = container[i].Size(); 386 const u32 num_samplers = container[i].Size();
381 template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, 387 VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
382 entry_size); 388 entry.dstBinding = binding;
389 entry.dstArrayElement = 0;
390 entry.descriptorCount = num_samplers;
391 entry.descriptorType = descriptor_type;
392 entry.offset = offset;
393 entry.stride = entry_size;
394
383 ++binding; 395 ++binding;
384 offset += num_samplers * entry_size; 396 offset += num_samplers * entry_size;
385 } 397 }
386 return; 398 return;
387 } 399 }
388 400
389 if constexpr (descriptor_type == eUniformTexelBuffer) { 401 if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) {
390 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to 402 // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
391 // crash. 403 // crash.
392 for (u32 i = 0; i < count; ++i) { 404 for (u32 i = 0; i < count; ++i) {
393 template_entries.emplace_back(binding + i, 0, 1, descriptor_type, 405 VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
394 offset + i * entry_size, entry_size); 406 entry.dstBinding = binding + i;
407 entry.dstArrayElement = 0;
408 entry.descriptorCount = 1;
409 entry.descriptorType = descriptor_type;
410 entry.offset = offset + i * entry_size;
411 entry.stride = entry_size;
395 } 412 }
396 } else if (count > 0) { 413 } else if (count > 0) {
397 template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); 414 VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back();
415 entry.dstBinding = binding;
416 entry.dstArrayElement = 0;
417 entry.descriptorCount = count;
418 entry.descriptorType = descriptor_type;
419 entry.offset = offset;
420 entry.stride = entry_size;
398 } 421 }
399 offset += count * entry_size; 422 offset += count * entry_size;
400 binding += count; 423 binding += count;
@@ -402,12 +425,12 @@ void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries,
402 425
403void FillDescriptorUpdateTemplateEntries( 426void FillDescriptorUpdateTemplateEntries(
404 const ShaderEntries& entries, u32& binding, u32& offset, 427 const ShaderEntries& entries, u32& binding, u32& offset,
405 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { 428 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
406 AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers); 429 AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
407 AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers); 430 AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
408 AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers); 431 AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers);
409 AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers); 432 AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
410 AddEntry<eStorageImage>(template_entries, offset, binding, entries.images); 433 AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
411} 434}
412 435
413} // namespace Vulkan 436} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c4c112290..7ccdb7083 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -19,12 +19,12 @@
19#include "video_core/engines/const_buffer_engine_interface.h" 19#include "video_core/engines/const_buffer_engine_interface.h"
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/rasterizer_cache.h" 21#include "video_core/rasterizer_cache.h"
22#include "video_core/renderer_vulkan/declarations.h"
23#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 22#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 23#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
25#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 24#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
26#include "video_core/renderer_vulkan/vk_resource_manager.h" 25#include "video_core/renderer_vulkan/vk_resource_manager.h"
27#include "video_core/renderer_vulkan/vk_shader_decompiler.h" 26#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
27#include "video_core/renderer_vulkan/wrapper.h"
28#include "video_core/shader/registry.h" 28#include "video_core/shader/registry.h"
29#include "video_core/shader/shader_ir.h" 29#include "video_core/shader/shader_ir.h"
30#include "video_core/surface.h" 30#include "video_core/surface.h"
@@ -113,17 +113,13 @@ namespace Vulkan {
113class CachedShader final : public RasterizerCacheObject { 113class CachedShader final : public RasterizerCacheObject {
114public: 114public:
115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, 115 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
116 VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); 116 VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
117 ~CachedShader(); 117 ~CachedShader();
118 118
119 GPUVAddr GetGpuAddr() const { 119 GPUVAddr GetGpuAddr() const {
120 return gpu_addr; 120 return gpu_addr;
121 } 121 }
122 122
123 VAddr GetCpuAddr() const override {
124 return cpu_addr;
125 }
126
127 std::size_t GetSizeInBytes() const override { 123 std::size_t GetSizeInBytes() const override {
128 return program_code.size() * sizeof(u64); 124 return program_code.size() * sizeof(u64);
129 } 125 }
@@ -149,7 +145,6 @@ private:
149 Tegra::Engines::ShaderType stage); 145 Tegra::Engines::ShaderType stage);
150 146
151 GPUVAddr gpu_addr{}; 147 GPUVAddr gpu_addr{};
152 VAddr cpu_addr{};
153 ProgramCode program_code; 148 ProgramCode program_code;
154 VideoCommon::Shader::Registry registry; 149 VideoCommon::Shader::Registry registry;
155 VideoCommon::Shader::ShaderIR shader_ir; 150 VideoCommon::Shader::ShaderIR shader_ir;
@@ -177,7 +172,7 @@ protected:
177 void FlushObjectInner(const Shader& object) override {} 172 void FlushObjectInner(const Shader& object) override {}
178 173
179private: 174private:
180 std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders( 175 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
181 const GraphicsPipelineCacheKey& key); 176 const GraphicsPipelineCacheKey& key);
182 177
183 Core::System& system; 178 Core::System& system;
@@ -199,6 +194,6 @@ private:
199 194
200void FillDescriptorUpdateTemplateEntries( 195void FillDescriptorUpdateTemplateEntries(
201 const ShaderEntries& entries, u32& binding, u32& offset, 196 const ShaderEntries& entries, u32& binding, u32& offset,
202 std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); 197 std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries);
203 198
204} // namespace Vulkan 199} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index ffbf60dda..0966c7ff7 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -8,19 +8,19 @@
8#include <utility> 8#include <utility>
9#include <vector> 9#include <vector>
10 10
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h" 11#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_query_cache.h" 12#include "video_core/renderer_vulkan/vk_query_cache.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h" 13#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
15#include "video_core/renderer_vulkan/wrapper.h"
16 16
17namespace Vulkan { 17namespace Vulkan {
18 18
19namespace { 19namespace {
20 20
21constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; 21constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
22 22
23constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { 23constexpr VkQueryType GetTarget(VideoCore::QueryType type) {
24 return QUERY_TARGETS[static_cast<std::size_t>(type)]; 24 return QUERY_TARGETS[static_cast<std::size_t>(type)];
25} 25}
26 26
@@ -35,29 +35,34 @@ void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_)
35 type = type_; 35 type = type_;
36} 36}
37 37
38std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { 38std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
39 std::size_t index; 39 std::size_t index;
40 do { 40 do {
41 index = CommitResource(fence); 41 index = CommitResource(fence);
42 } while (usage[index]); 42 } while (usage[index]);
43 usage[index] = true; 43 usage[index] = true;
44 44
45 return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; 45 return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)};
46} 46}
47 47
48void QueryPool::Allocate(std::size_t begin, std::size_t end) { 48void QueryPool::Allocate(std::size_t begin, std::size_t end) {
49 usage.resize(end); 49 usage.resize(end);
50 50
51 const auto dev = device->GetLogical(); 51 VkQueryPoolCreateInfo query_pool_ci;
52 const u32 size = static_cast<u32>(end - begin); 52 query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
53 const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); 53 query_pool_ci.pNext = nullptr;
54 pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); 54 query_pool_ci.flags = 0;
55 query_pool_ci.queryType = GetTarget(type);
56 query_pool_ci.queryCount = static_cast<u32>(end - begin);
57 query_pool_ci.pipelineStatistics = 0;
58 pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci));
55} 59}
56 60
57void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { 61void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
58 const auto it = 62 const auto it =
59 std::find_if(std::begin(pools), std::end(pools), 63 std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) {
60 [query_pool = query.first](auto& pool) { return query_pool == *pool; }); 64 return query_pool == *pool;
65 });
61 ASSERT(it != std::end(pools)); 66 ASSERT(it != std::end(pools));
62 67
63 const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); 68 const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
@@ -76,12 +81,11 @@ VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface&
76 81
77VKQueryCache::~VKQueryCache() = default; 82VKQueryCache::~VKQueryCache() = default;
78 83
79std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { 84std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
80 return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); 85 return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
81} 86}
82 87
83void VKQueryCache::Reserve(VideoCore::QueryType type, 88void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) {
84 std::pair<vk::QueryPool, std::uint32_t> query) {
85 query_pools[static_cast<std::size_t>(type)].Reserve(query); 89 query_pools[static_cast<std::size_t>(type)].Reserve(query);
86} 90}
87 91
@@ -89,10 +93,10 @@ HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> depen
89 VideoCore::QueryType type) 93 VideoCore::QueryType type)
90 : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, 94 : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
91 type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { 95 type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
92 const auto dev = cache.Device().GetLogical(); 96 const vk::Device* logical = &cache.Device().GetLogical();
93 cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { 97 cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
94 dev.resetQueryPoolEXT(query.first, query.second, 1, dld); 98 logical->ResetQueryPoolEXT(query.first, query.second, 1);
95 cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); 99 cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT);
96 }); 100 });
97} 101}
98 102
@@ -101,22 +105,16 @@ HostCounter::~HostCounter() {
101} 105}
102 106
103void HostCounter::EndQuery() { 107void HostCounter::EndQuery() {
104 cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { 108 cache.Scheduler().Record(
105 cmdbuf.endQuery(query.first, query.second, dld); 109 [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); });
106 });
107} 110}
108 111
109u64 HostCounter::BlockingQuery() const { 112u64 HostCounter::BlockingQuery() const {
110 if (ticks >= cache.Scheduler().Ticks()) { 113 if (ticks >= cache.Scheduler().Ticks()) {
111 cache.Scheduler().Flush(); 114 cache.Scheduler().Flush();
112 } 115 }
113 116 return cache.Device().GetLogical().GetQueryResult<u64>(
114 const auto dev = cache.Device().GetLogical(); 117 query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
115 const auto& dld = cache.Device().GetDispatchLoader();
116 u64 value;
117 dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
118 vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
119 return value;
120} 118}
121 119
122} // namespace Vulkan 120} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index c3092ee96..b63784f4b 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -12,8 +12,8 @@
12 12
13#include "common/common_types.h" 13#include "common/common_types.h"
14#include "video_core/query_cache.h" 14#include "video_core/query_cache.h"
15#include "video_core/renderer_vulkan/declarations.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h" 15#include "video_core/renderer_vulkan/vk_resource_manager.h"
16#include "video_core/renderer_vulkan/wrapper.h"
17 17
18namespace VideoCore { 18namespace VideoCore {
19class RasterizerInterface; 19class RasterizerInterface;
@@ -36,9 +36,9 @@ public:
36 36
37 void Initialize(const VKDevice& device, VideoCore::QueryType type); 37 void Initialize(const VKDevice& device, VideoCore::QueryType type);
38 38
39 std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); 39 std::pair<VkQueryPool, u32> Commit(VKFence& fence);
40 40
41 void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); 41 void Reserve(std::pair<VkQueryPool, u32> query);
42 42
43protected: 43protected:
44 void Allocate(std::size_t begin, std::size_t end) override; 44 void Allocate(std::size_t begin, std::size_t end) override;
@@ -49,7 +49,7 @@ private:
49 const VKDevice* device = nullptr; 49 const VKDevice* device = nullptr;
50 VideoCore::QueryType type = {}; 50 VideoCore::QueryType type = {};
51 51
52 std::vector<UniqueQueryPool> pools; 52 std::vector<vk::QueryPool> pools;
53 std::vector<bool> usage; 53 std::vector<bool> usage;
54}; 54};
55 55
@@ -61,9 +61,9 @@ public:
61 const VKDevice& device, VKScheduler& scheduler); 61 const VKDevice& device, VKScheduler& scheduler);
62 ~VKQueryCache(); 62 ~VKQueryCache();
63 63
64 std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); 64 std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
65 65
66 void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); 66 void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query);
67 67
68 const VKDevice& Device() const noexcept { 68 const VKDevice& Device() const noexcept {
69 return device; 69 return device;
@@ -91,7 +91,7 @@ private:
91 91
92 VKQueryCache& cache; 92 VKQueryCache& cache;
93 const VideoCore::QueryType type; 93 const VideoCore::QueryType type;
94 const std::pair<vk::QueryPool, std::uint32_t> query; 94 const std::pair<VkQueryPool, u32> query;
95 const u64 ticks; 95 const u64 ticks;
96}; 96};
97 97
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 58c69b786..774ba1f26 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -19,7 +19,6 @@
19#include "core/memory.h" 19#include "core/memory.h"
20#include "video_core/engines/kepler_compute.h" 20#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/declarations.h"
23#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 22#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
24#include "video_core/renderer_vulkan/maxwell_to_vk.h" 23#include "video_core/renderer_vulkan/maxwell_to_vk.h"
25#include "video_core/renderer_vulkan/renderer_vulkan.h" 24#include "video_core/renderer_vulkan/renderer_vulkan.h"
@@ -39,6 +38,7 @@
39#include "video_core/renderer_vulkan/vk_state_tracker.h" 38#include "video_core/renderer_vulkan/vk_state_tracker.h"
40#include "video_core/renderer_vulkan/vk_texture_cache.h" 39#include "video_core/renderer_vulkan/vk_texture_cache.h"
41#include "video_core/renderer_vulkan/vk_update_descriptor.h" 40#include "video_core/renderer_vulkan/vk_update_descriptor.h"
41#include "video_core/renderer_vulkan/wrapper.h"
42 42
43namespace Vulkan { 43namespace Vulkan {
44 44
@@ -60,32 +60,42 @@ namespace {
60 60
61constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); 61constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute);
62 62
63vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { 63VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
64 const auto& viewport = regs.viewport_transform[index]; 64 const auto& src = regs.viewport_transform[index];
65 const float x = viewport.translate_x - viewport.scale_x; 65 const float width = src.scale_x * 2.0f;
66 const float y = viewport.translate_y - viewport.scale_y; 66 const float height = src.scale_y * 2.0f;
67 const float width = viewport.scale_x * 2.0f;
68 const float height = viewport.scale_y * 2.0f;
69 67
70 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; 68 VkViewport viewport;
71 float near = viewport.translate_z - viewport.scale_z * reduce_z; 69 viewport.x = src.translate_x - src.scale_x;
72 float far = viewport.translate_z + viewport.scale_z; 70 viewport.y = src.translate_y - src.scale_y;
71 viewport.width = width != 0.0f ? width : 1.0f;
72 viewport.height = height != 0.0f ? height : 1.0f;
73
74 const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
75 viewport.minDepth = src.translate_z - src.scale_z * reduce_z;
76 viewport.maxDepth = src.translate_z + src.scale_z;
73 if (!device.IsExtDepthRangeUnrestrictedSupported()) { 77 if (!device.IsExtDepthRangeUnrestrictedSupported()) {
74 near = std::clamp(near, 0.0f, 1.0f); 78 viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
75 far = std::clamp(far, 0.0f, 1.0f); 79 viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
76 } 80 }
77 81 return viewport;
78 return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far);
79} 82}
80 83
81constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { 84VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
82 const auto& scissor = regs.scissor_test[index]; 85 const auto& src = regs.scissor_test[index];
83 if (!scissor.enable) { 86 VkRect2D scissor;
84 return {{0, 0}, {INT32_MAX, INT32_MAX}}; 87 if (src.enable) {
88 scissor.offset.x = static_cast<s32>(src.min_x);
89 scissor.offset.y = static_cast<s32>(src.min_y);
90 scissor.extent.width = src.max_x - src.min_x;
91 scissor.extent.height = src.max_y - src.min_y;
92 } else {
93 scissor.offset.x = 0;
94 scissor.offset.y = 0;
95 scissor.extent.width = std::numeric_limits<s32>::max();
96 scissor.extent.height = std::numeric_limits<s32>::max();
85 } 97 }
86 const u32 width = scissor.max_x - scissor.min_x; 98 return scissor;
87 const u32 height = scissor.max_y - scissor.min_y;
88 return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}};
89} 99}
90 100
91std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( 101std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
@@ -97,8 +107,8 @@ std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
97 return addresses; 107 return addresses;
98} 108}
99 109
100void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage, 110void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage,
101 vk::AccessFlags access) { 111 VkAccessFlags access) {
102 for (auto& [view, layout] : views) { 112 for (auto& [view, layout] : views) {
103 view->Transition(*layout, pipeline_stage, access); 113 view->Transition(*layout, pipeline_stage, access);
104 } 114 }
@@ -127,13 +137,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
127 137
128class BufferBindings final { 138class BufferBindings final {
129public: 139public:
130 void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { 140 void AddVertexBinding(const VkBuffer* buffer, VkDeviceSize offset) {
131 vertex.buffer_ptrs[vertex.num_buffers] = buffer; 141 vertex.buffer_ptrs[vertex.num_buffers] = buffer;
132 vertex.offsets[vertex.num_buffers] = offset; 142 vertex.offsets[vertex.num_buffers] = offset;
133 ++vertex.num_buffers; 143 ++vertex.num_buffers;
134 } 144 }
135 145
136 void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { 146 void SetIndexBinding(const VkBuffer* buffer, VkDeviceSize offset, VkIndexType type) {
137 index.buffer = buffer; 147 index.buffer = buffer;
138 index.offset = offset; 148 index.offset = offset;
139 index.type = type; 149 index.type = type;
@@ -217,14 +227,14 @@ private:
217 // Some of these fields are intentionally left uninitialized to avoid initializing them twice. 227 // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
218 struct { 228 struct {
219 std::size_t num_buffers = 0; 229 std::size_t num_buffers = 0;
220 std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs; 230 std::array<const VkBuffer*, Maxwell::NumVertexArrays> buffer_ptrs;
221 std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets; 231 std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
222 } vertex; 232 } vertex;
223 233
224 struct { 234 struct {
225 const vk::Buffer* buffer = nullptr; 235 const VkBuffer* buffer = nullptr;
226 vk::DeviceSize offset; 236 VkDeviceSize offset;
227 vk::IndexType type; 237 VkIndexType type;
228 } index; 238 } index;
229 239
230 template <std::size_t N> 240 template <std::size_t N>
@@ -243,38 +253,35 @@ private:
243 return; 253 return;
244 } 254 }
245 255
246 std::array<vk::Buffer, N> buffers; 256 std::array<VkBuffer, N> buffers;
247 std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), 257 std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
248 [](const auto ptr) { return *ptr; }); 258 [](const auto ptr) { return *ptr; });
249 259
250 std::array<vk::DeviceSize, N> offsets; 260 std::array<VkDeviceSize, N> offsets;
251 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); 261 std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
252 262
253 if constexpr (is_indexed) { 263 if constexpr (is_indexed) {
254 // Indexed draw 264 // Indexed draw
255 scheduler.Record([buffers, offsets, index_buffer = *index.buffer, 265 scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
256 index_offset = index.offset, 266 index_offset = index.offset,
257 index_type = index.type](auto cmdbuf, auto& dld) { 267 index_type = index.type](vk::CommandBuffer cmdbuf) {
258 cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); 268 cmdbuf.BindIndexBuffer(index_buffer, index_offset, index_type);
259 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), 269 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
260 dld);
261 }); 270 });
262 } else { 271 } else {
263 // Array draw 272 // Array draw
264 scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { 273 scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) {
265 cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), 274 cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
266 dld);
267 }); 275 });
268 } 276 }
269 } 277 }
270}; 278};
271 279
272void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, 280void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
273 const vk::DispatchLoaderDynamic& dld) const {
274 if (is_indexed) { 281 if (is_indexed) {
275 cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); 282 cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance);
276 } else { 283 } else {
277 cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); 284 cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance);
278 } 285 }
279} 286}
280 287
@@ -337,7 +344,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
337 344
338 const auto renderpass = pipeline.GetRenderPass(); 345 const auto renderpass = pipeline.GetRenderPass();
339 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); 346 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
340 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); 347 scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
341 348
342 UpdateDynamicStates(); 349 UpdateDynamicStates();
343 350
@@ -345,19 +352,19 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
345 352
346 if (device.IsNvDeviceDiagnosticCheckpoints()) { 353 if (device.IsNvDeviceDiagnosticCheckpoints()) {
347 scheduler.Record( 354 scheduler.Record(
348 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); 355 [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); });
349 } 356 }
350 357
351 BeginTransformFeedback(); 358 BeginTransformFeedback();
352 359
353 const auto pipeline_layout = pipeline.GetLayout(); 360 const auto pipeline_layout = pipeline.GetLayout();
354 const auto descriptor_set = pipeline.CommitDescriptorSet(); 361 const auto descriptor_set = pipeline.CommitDescriptorSet();
355 scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { 362 scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
356 if (descriptor_set) { 363 if (descriptor_set) {
357 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, 364 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
358 DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); 365 DESCRIPTOR_SET, descriptor_set, {});
359 } 366 }
360 draw_params.Draw(cmdbuf, dld); 367 draw_params.Draw(cmdbuf);
361 }); 368 });
362 369
363 EndTransformFeedback(); 370 EndTransformFeedback();
@@ -389,48 +396,54 @@ void RasterizerVulkan::Clear() {
389 DEBUG_ASSERT(texceptions.none()); 396 DEBUG_ASSERT(texceptions.none());
390 SetupImageTransitions(0, color_attachments, zeta_attachment); 397 SetupImageTransitions(0, color_attachments, zeta_attachment);
391 398
392 const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); 399 const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0));
393 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); 400 const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
394 scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); 401 scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
395
396 const auto& scissor = regs.scissor_test[0];
397 const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y);
398 vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y};
399 scissor_extent.width = std::min(scissor_extent.width, render_area.width);
400 scissor_extent.height = std::min(scissor_extent.height, render_area.height);
401 402
402 const u32 layer = regs.clear_buffers.layer; 403 VkClearRect clear_rect;
403 const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); 404 clear_rect.baseArrayLayer = regs.clear_buffers.layer;
405 clear_rect.layerCount = 1;
406 clear_rect.rect = GetScissorState(regs, 0);
407 clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width);
408 clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height);
404 409
405 if (use_color) { 410 if (use_color) {
406 const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], 411 VkClearValue clear_value;
407 regs.clear_color[2], regs.clear_color[3]}; 412 std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color));
408 const vk::ClearValue clear_value{clear_color}; 413
409 const u32 color_attachment = regs.clear_buffers.RT; 414 const u32 color_attachment = regs.clear_buffers.RT;
410 scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { 415 scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
411 const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, 416 VkClearAttachment attachment;
412 clear_value); 417 attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
413 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); 418 attachment.colorAttachment = color_attachment;
419 attachment.clearValue = clear_value;
420 cmdbuf.ClearAttachments(attachment, clear_rect);
414 }); 421 });
415 } 422 }
416 423
417 if (!use_depth && !use_stencil) { 424 if (!use_depth && !use_stencil) {
418 return; 425 return;
419 } 426 }
420 vk::ImageAspectFlags aspect_flags; 427 VkImageAspectFlags aspect_flags = 0;
421 if (use_depth) { 428 if (use_depth) {
422 aspect_flags |= vk::ImageAspectFlagBits::eDepth; 429 aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
423 } 430 }
424 if (use_stencil) { 431 if (use_stencil) {
425 aspect_flags |= vk::ImageAspectFlagBits::eStencil; 432 aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
426 } 433 }
427 434
428 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, 435 scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
429 clear_rect, aspect_flags](auto cmdbuf, auto& dld) { 436 clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
430 const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); 437 VkClearValue clear_value;
431 const vk::ClearValue clear_value{clear_zeta}; 438 clear_value.depthStencil.depth = clear_depth;
432 const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); 439 clear_value.depthStencil.stencil = clear_stencil;
433 cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); 440
441 VkClearAttachment attachment;
442 attachment.aspectMask = aspect_flags;
443 attachment.colorAttachment = 0;
444 attachment.clearValue.depthStencil.depth = clear_depth;
445 attachment.clearValue.depthStencil.stencil = clear_stencil;
446 cmdbuf.ClearAttachments(attachment, clear_rect);
434 }); 447 });
435} 448}
436 449
@@ -463,24 +476,24 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
463 476
464 buffer_cache.Unmap(); 477 buffer_cache.Unmap();
465 478
466 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, 479 TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
467 vk::AccessFlagBits::eShaderRead); 480 VK_ACCESS_SHADER_READ_BIT);
468 TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, 481 TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
469 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); 482 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
470 483
471 if (device.IsNvDeviceDiagnosticCheckpoints()) { 484 if (device.IsNvDeviceDiagnosticCheckpoints()) {
472 scheduler.Record( 485 scheduler.Record(
473 [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); 486 [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); });
474 } 487 }
475 488
476 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, 489 scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
477 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), 490 grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
478 layout = pipeline.GetLayout(), 491 layout = pipeline.GetLayout(),
479 descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { 492 descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) {
480 cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); 493 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
481 cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, 494 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET,
482 &descriptor_set, 0, nullptr, dld); 495 descriptor_set, {});
483 cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); 496 cmdbuf.Dispatch(grid_x, grid_y, grid_z);
484 }); 497 });
485} 498}
486 499
@@ -495,20 +508,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
495 508
496void RasterizerVulkan::FlushAll() {} 509void RasterizerVulkan::FlushAll() {}
497 510
498void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { 511void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
512 if (addr == 0 || size == 0) {
513 return;
514 }
499 texture_cache.FlushRegion(addr, size); 515 texture_cache.FlushRegion(addr, size);
500 buffer_cache.FlushRegion(addr, size); 516 buffer_cache.FlushRegion(addr, size);
501 query_cache.FlushRegion(addr, size); 517 query_cache.FlushRegion(addr, size);
502} 518}
503 519
504void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { 520void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
521 if (addr == 0 || size == 0) {
522 return;
523 }
505 texture_cache.InvalidateRegion(addr, size); 524 texture_cache.InvalidateRegion(addr, size);
506 pipeline_cache.InvalidateRegion(addr, size); 525 pipeline_cache.InvalidateRegion(addr, size);
507 buffer_cache.InvalidateRegion(addr, size); 526 buffer_cache.InvalidateRegion(addr, size);
508 query_cache.InvalidateRegion(addr, size); 527 query_cache.InvalidateRegion(addr, size);
509} 528}
510 529
511void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { 530void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
512 FlushRegion(addr, size); 531 FlushRegion(addr, size);
513 InvalidateRegion(addr, size); 532 InvalidateRegion(addr, size);
514} 533}
@@ -540,8 +559,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
540 return false; 559 return false;
541 } 560 }
542 561
543 const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; 562 const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
544 const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
545 if (!surface) { 563 if (!surface) {
546 return false; 564 return false;
547 } 565 }
@@ -594,7 +612,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
594 Texceptions texceptions; 612 Texceptions texceptions;
595 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { 613 for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
596 if (update_rendertargets) { 614 if (update_rendertargets) {
597 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); 615 color_attachments[rt] = texture_cache.GetColorBufferSurface(rt);
598 } 616 }
599 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { 617 if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
600 texceptions[rt] = true; 618 texceptions[rt] = true;
@@ -602,7 +620,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
602 } 620 }
603 621
604 if (update_rendertargets) { 622 if (update_rendertargets) {
605 zeta_attachment = texture_cache.GetDepthBufferSurface(true); 623 zeta_attachment = texture_cache.GetDepthBufferSurface();
606 } 624 }
607 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { 625 if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
608 texceptions[ZETA_TEXCEPTION_INDEX] = true; 626 texceptions[ZETA_TEXCEPTION_INDEX] = true;
@@ -620,13 +638,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
620 continue; 638 continue;
621 } 639 }
622 overlap = true; 640 overlap = true;
623 *layout = vk::ImageLayout::eGeneral; 641 *layout = VK_IMAGE_LAYOUT_GENERAL;
624 } 642 }
625 return overlap; 643 return overlap;
626} 644}
627 645
628std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( 646std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
629 vk::RenderPass renderpass) { 647 VkRenderPass renderpass) {
630 FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), 648 FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
631 std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; 649 std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
632 650
@@ -653,15 +671,20 @@ std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffer
653 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); 671 const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
654 auto& framebuffer = fbentry->second; 672 auto& framebuffer = fbentry->second;
655 if (is_cache_miss) { 673 if (is_cache_miss) {
656 const vk::FramebufferCreateInfo framebuffer_ci( 674 VkFramebufferCreateInfo framebuffer_ci;
657 {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, 675 framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
658 key.height, key.layers); 676 framebuffer_ci.pNext = nullptr;
659 const auto dev = device.GetLogical(); 677 framebuffer_ci.flags = 0;
660 const auto& dld = device.GetDispatchLoader(); 678 framebuffer_ci.renderPass = key.renderpass;
661 framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); 679 framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size());
662 } 680 framebuffer_ci.pAttachments = key.views.data();
663 681 framebuffer_ci.width = key.width;
664 return {*framebuffer, vk::Extent2D{key.width, key.height}}; 682 framebuffer_ci.height = key.height;
683 framebuffer_ci.layers = key.layers;
684 framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci);
685 }
686
687 return {*framebuffer, VkExtent2D{key.width, key.height}};
665} 688}
666 689
667RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, 690RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
@@ -709,10 +732,9 @@ void RasterizerVulkan::SetupShaderDescriptors(
709void RasterizerVulkan::SetupImageTransitions( 732void RasterizerVulkan::SetupImageTransitions(
710 Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, 733 Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
711 const View& zeta_attachment) { 734 const View& zeta_attachment) {
712 TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, 735 TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT);
713 vk::AccessFlagBits::eShaderRead); 736 TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
714 TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, 737 VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT);
715 vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
716 738
717 for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { 739 for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) {
718 const auto color_attachment = color_attachments[rt]; 740 const auto color_attachment = color_attachments[rt];
@@ -720,19 +742,19 @@ void RasterizerVulkan::SetupImageTransitions(
720 continue; 742 continue;
721 } 743 }
722 const auto image_layout = 744 const auto image_layout =
723 texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; 745 texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
724 color_attachment->Transition( 746 color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
725 image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, 747 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
726 vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); 748 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
727 } 749 }
728 750
729 if (zeta_attachment != nullptr) { 751 if (zeta_attachment != nullptr) {
730 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] 752 const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
731 ? vk::ImageLayout::eGeneral 753 ? VK_IMAGE_LAYOUT_GENERAL
732 : vk::ImageLayout::eDepthStencilAttachmentOptimal; 754 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
733 zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, 755 zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
734 vk::AccessFlagBits::eDepthStencilAttachmentRead | 756 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
735 vk::AccessFlagBits::eDepthStencilAttachmentWrite); 757 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT);
736 } 758 }
737} 759}
738 760
@@ -768,9 +790,9 @@ void RasterizerVulkan::BeginTransformFeedback() {
768 const std::size_t size = binding.buffer_size; 790 const std::size_t size = binding.buffer_size;
769 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); 791 const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
770 792
771 scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { 793 scheduler.Record([buffer = *buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
772 cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); 794 cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
773 cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); 795 cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
774 }); 796 });
775} 797}
776 798
@@ -781,7 +803,7 @@ void RasterizerVulkan::EndTransformFeedback() {
781 } 803 }
782 804
783 scheduler.Record( 805 scheduler.Record(
784 [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); 806 [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
785} 807}
786 808
787void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, 809void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
@@ -832,7 +854,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
832 } else { 854 } else {
833 const auto [buffer, offset] = 855 const auto [buffer, offset] =
834 quad_array_pass.Assemble(params.num_vertices, params.base_vertex); 856 quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
835 buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); 857 buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32);
836 params.base_vertex = 0; 858 params.base_vertex = 0;
837 params.num_vertices = params.num_vertices * 6 / 4; 859 params.num_vertices = params.num_vertices * 6 / 4;
838 params.is_indexed = true; 860 params.is_indexed = true;
@@ -1017,7 +1039,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu
1017 update_descriptor_queue.AddSampledImage(sampler, image_view); 1039 update_descriptor_queue.AddSampledImage(sampler, image_view);
1018 1040
1019 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1041 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
1020 *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; 1042 *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1021 sampled_views.push_back(ImageView{std::move(view), image_layout}); 1043 sampled_views.push_back(ImageView{std::move(view), image_layout});
1022} 1044}
1023 1045
@@ -1034,7 +1056,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima
1034 update_descriptor_queue.AddImage(image_view); 1056 update_descriptor_queue.AddImage(image_view);
1035 1057
1036 const auto image_layout = update_descriptor_queue.GetLastImageLayout(); 1058 const auto image_layout = update_descriptor_queue.GetLastImageLayout();
1037 *image_layout = vk::ImageLayout::eGeneral; 1059 *image_layout = VK_IMAGE_LAYOUT_GENERAL;
1038 image_views.push_back(ImageView{std::move(view), image_layout}); 1060 image_views.push_back(ImageView{std::move(view), image_layout});
1039} 1061}
1040 1062
@@ -1051,9 +1073,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
1051 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), 1073 GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
1052 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), 1074 GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
1053 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; 1075 GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
1054 scheduler.Record([viewports](auto cmdbuf, auto& dld) { 1076 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); });
1055 cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld);
1056 });
1057} 1077}
1058 1078
1059void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { 1079void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1067,9 +1087,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
1067 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), 1087 GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11),
1068 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), 1088 GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
1069 GetScissorState(regs, 15)}; 1089 GetScissorState(regs, 15)};
1070 scheduler.Record([scissors](auto cmdbuf, auto& dld) { 1090 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); });
1071 cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld);
1072 });
1073} 1091}
1074 1092
1075void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { 1093void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1077,8 +1095,8 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
1077 return; 1095 return;
1078 } 1096 }
1079 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, 1097 scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
1080 factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { 1098 factor = regs.polygon_offset_factor](vk::CommandBuffer cmdbuf) {
1081 cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); 1099 cmdbuf.SetDepthBias(constant, clamp, factor / 2.0f);
1082 }); 1100 });
1083} 1101}
1084 1102
@@ -1088,9 +1106,8 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg
1088 } 1106 }
1089 const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, 1107 const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
1090 regs.blend_color.a}; 1108 regs.blend_color.a};
1091 scheduler.Record([blend_color](auto cmdbuf, auto& dld) { 1109 scheduler.Record(
1092 cmdbuf.setBlendConstants(blend_color.data(), dld); 1110 [blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); });
1093 });
1094} 1111}
1095 1112
1096void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { 1113void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1098,7 +1115,7 @@ void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs)
1098 return; 1115 return;
1099 } 1116 }
1100 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( 1117 scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
1101 auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); 1118 vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); });
1102} 1119}
1103 1120
1104void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { 1121void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -1111,24 +1128,24 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
1111 [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, 1128 [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask,
1112 front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, 1129 front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref,
1113 back_write_mask = regs.stencil_back_mask, 1130 back_write_mask = regs.stencil_back_mask,
1114 back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { 1131 back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
1115 // Front face 1132 // Front face
1116 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); 1133 cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref);
1117 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); 1134 cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask);
1118 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); 1135 cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask);
1119 1136
1120 // Back face 1137 // Back face
1121 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); 1138 cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
1122 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); 1139 cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
1123 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); 1140 cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
1124 }); 1141 });
1125 } else { 1142 } else {
1126 // Front face defines both faces 1143 // Front face defines both faces
1127 scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, 1144 scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask,
1128 test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { 1145 test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
1129 cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); 1146 cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
1130 cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); 1147 cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
1131 cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); 1148 cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
1132 }); 1149 });
1133 } 1150 }
1134} 1151}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 3185868e9..46037860a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -17,7 +17,6 @@
17#include "video_core/memory_manager.h" 17#include "video_core/memory_manager.h"
18#include "video_core/rasterizer_accelerated.h" 18#include "video_core/rasterizer_accelerated.h"
19#include "video_core/rasterizer_interface.h" 19#include "video_core/rasterizer_interface.h"
20#include "video_core/renderer_vulkan/declarations.h"
21#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
22#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
23#include "video_core/renderer_vulkan/vk_compute_pass.h" 22#include "video_core/renderer_vulkan/vk_compute_pass.h"
@@ -32,6 +31,7 @@
32#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 31#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
33#include "video_core/renderer_vulkan/vk_texture_cache.h" 32#include "video_core/renderer_vulkan/vk_texture_cache.h"
34#include "video_core/renderer_vulkan/vk_update_descriptor.h" 33#include "video_core/renderer_vulkan/vk_update_descriptor.h"
34#include "video_core/renderer_vulkan/wrapper.h"
35 35
36namespace Core { 36namespace Core {
37class System; 37class System;
@@ -49,11 +49,10 @@ namespace Vulkan {
49 49
50struct VKScreenInfo; 50struct VKScreenInfo;
51 51
52using ImageViewsPack = 52using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>;
53 boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
54 53
55struct FramebufferCacheKey { 54struct FramebufferCacheKey {
56 vk::RenderPass renderpass{}; 55 VkRenderPass renderpass{};
57 u32 width = 0; 56 u32 width = 0;
58 u32 height = 0; 57 u32 height = 0;
59 u32 layers = 0; 58 u32 layers = 0;
@@ -101,7 +100,7 @@ class BufferBindings;
101 100
102struct ImageView { 101struct ImageView {
103 View view; 102 View view;
104 vk::ImageLayout* layout = nullptr; 103 VkImageLayout* layout = nullptr;
105}; 104};
106 105
107class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { 106class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
@@ -118,9 +117,9 @@ public:
118 void ResetCounter(VideoCore::QueryType type) override; 117 void ResetCounter(VideoCore::QueryType type) override;
119 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 118 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
120 void FlushAll() override; 119 void FlushAll() override;
121 void FlushRegion(CacheAddr addr, u64 size) override; 120 void FlushRegion(VAddr addr, u64 size) override;
122 void InvalidateRegion(CacheAddr addr, u64 size) override; 121 void InvalidateRegion(VAddr addr, u64 size) override;
123 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 122 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
124 void FlushCommands() override; 123 void FlushCommands() override;
125 void TickFrame() override; 124 void TickFrame() override;
126 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 125 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -137,7 +136,7 @@ public:
137 136
138private: 137private:
139 struct DrawParameters { 138 struct DrawParameters {
140 void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; 139 void Draw(vk::CommandBuffer cmdbuf) const;
141 140
142 u32 base_instance = 0; 141 u32 base_instance = 0;
143 u32 num_instances = 0; 142 u32 num_instances = 0;
@@ -154,7 +153,7 @@ private:
154 153
155 Texceptions UpdateAttachments(); 154 Texceptions UpdateAttachments();
156 155
157 std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass); 156 std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
158 157
159 /// Setups geometry buffers and state. 158 /// Setups geometry buffers and state.
160 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, 159 DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
@@ -272,7 +271,7 @@ private:
272 u32 draw_counter = 0; 271 u32 draw_counter = 0;
273 272
274 // TODO(Rodrigo): Invalidate on image destruction 273 // TODO(Rodrigo): Invalidate on image destruction
275 std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache; 274 std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;
276}; 275};
277 276
278} // namespace Vulkan 277} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
index 93f5d7ba0..4e5286a69 100644
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp
@@ -6,10 +6,10 @@
6#include <vector> 6#include <vector>
7 7
8#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h" 9#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/renderer_vulkan/vk_device.h" 10#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 11#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -17,7 +17,7 @@ VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {}
17 17
18VKRenderPassCache::~VKRenderPassCache() = default; 18VKRenderPassCache::~VKRenderPassCache() = default;
19 19
20vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { 20VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
21 const auto [pair, is_cache_miss] = cache.try_emplace(params); 21 const auto [pair, is_cache_miss] = cache.try_emplace(params);
22 auto& entry = pair->second; 22 auto& entry = pair->second;
23 if (is_cache_miss) { 23 if (is_cache_miss) {
@@ -26,9 +26,9 @@ vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params)
26 return *entry; 26 return *entry;
27} 27}
28 28
29UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { 29vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
30 std::vector<vk::AttachmentDescription> descriptors; 30 std::vector<VkAttachmentDescription> descriptors;
31 std::vector<vk::AttachmentReference> color_references; 31 std::vector<VkAttachmentReference> color_references;
32 32
33 for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { 33 for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) {
34 const auto attachment = params.color_attachments[rt]; 34 const auto attachment = params.color_attachments[rt];
@@ -39,16 +39,25 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par
39 39
40 // TODO(Rodrigo): Add eMayAlias when it's needed. 40 // TODO(Rodrigo): Add eMayAlias when it's needed.
41 const auto color_layout = attachment.is_texception 41 const auto color_layout = attachment.is_texception
42 ? vk::ImageLayout::eGeneral 42 ? VK_IMAGE_LAYOUT_GENERAL
43 : vk::ImageLayout::eColorAttachmentOptimal; 43 : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
44 descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format, 44 VkAttachmentDescription& descriptor = descriptors.emplace_back();
45 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, 45 descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT;
46 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, 46 descriptor.format = format.format;
47 vk::AttachmentStoreOp::eDontCare, color_layout, color_layout); 47 descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
48 color_references.emplace_back(static_cast<u32>(rt), color_layout); 48 descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
49 descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
50 descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
51 descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
52 descriptor.initialLayout = color_layout;
53 descriptor.finalLayout = color_layout;
54
55 VkAttachmentReference& reference = color_references.emplace_back();
56 reference.attachment = static_cast<u32>(rt);
57 reference.layout = color_layout;
49 } 58 }
50 59
51 vk::AttachmentReference zeta_attachment_ref; 60 VkAttachmentReference zeta_attachment_ref;
52 if (params.has_zeta) { 61 if (params.has_zeta) {
53 const auto format = 62 const auto format =
54 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); 63 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format);
@@ -56,45 +65,68 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par
56 static_cast<u32>(params.zeta_pixel_format)); 65 static_cast<u32>(params.zeta_pixel_format));
57 66
58 const auto zeta_layout = params.zeta_texception 67 const auto zeta_layout = params.zeta_texception
59 ? vk::ImageLayout::eGeneral 68 ? VK_IMAGE_LAYOUT_GENERAL
60 : vk::ImageLayout::eDepthStencilAttachmentOptimal; 69 : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
61 descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format, 70 VkAttachmentDescription& descriptor = descriptors.emplace_back();
62 vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, 71 descriptor.flags = 0;
63 vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad, 72 descriptor.format = format.format;
64 vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout); 73 descriptor.samples = VK_SAMPLE_COUNT_1_BIT;
65 zeta_attachment_ref = 74 descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
66 vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout); 75 descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
76 descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
77 descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
78 descriptor.initialLayout = zeta_layout;
79 descriptor.finalLayout = zeta_layout;
80
81 zeta_attachment_ref.attachment = static_cast<u32>(params.color_attachments.size());
82 zeta_attachment_ref.layout = zeta_layout;
67 } 83 }
68 84
69 const vk::SubpassDescription subpass_description( 85 VkSubpassDescription subpass_description;
70 {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()), 86 subpass_description.flags = 0;
71 color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0, 87 subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
72 nullptr); 88 subpass_description.inputAttachmentCount = 0;
73 89 subpass_description.pInputAttachments = nullptr;
74 vk::AccessFlags access; 90 subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size());
75 vk::PipelineStageFlags stage; 91 subpass_description.pColorAttachments = color_references.data();
92 subpass_description.pResolveAttachments = nullptr;
93 subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr;
94 subpass_description.preserveAttachmentCount = 0;
95 subpass_description.pPreserveAttachments = nullptr;
96
97 VkAccessFlags access = 0;
98 VkPipelineStageFlags stage = 0;
76 if (!color_references.empty()) { 99 if (!color_references.empty()) {
77 access |= 100 access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
78 vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; 101 stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
79 stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
80 } 102 }
81 103
82 if (params.has_zeta) { 104 if (params.has_zeta) {
83 access |= vk::AccessFlagBits::eDepthStencilAttachmentRead | 105 access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
84 vk::AccessFlagBits::eDepthStencilAttachmentWrite; 106 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
85 stage |= vk::PipelineStageFlagBits::eLateFragmentTests; 107 stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
86 } 108 }
87 109
88 const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access, 110 VkSubpassDependency subpass_dependency;
89 {}); 111 subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL;
90 112 subpass_dependency.dstSubpass = 0;
91 const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()), 113 subpass_dependency.srcStageMask = stage;
92 descriptors.data(), 1, &subpass_description, 1, 114 subpass_dependency.dstStageMask = stage;
93 &subpass_dependency); 115 subpass_dependency.srcAccessMask = 0;
94 116 subpass_dependency.dstAccessMask = access;
95 const auto dev = device.GetLogical(); 117 subpass_dependency.dependencyFlags = 0;
96 const auto& dld = device.GetDispatchLoader(); 118
97 return dev.createRenderPassUnique(create_info, nullptr, dld); 119 VkRenderPassCreateInfo ci;
120 ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
121 ci.pNext = nullptr;
122 ci.flags = 0;
123 ci.attachmentCount = static_cast<u32>(descriptors.size());
124 ci.pAttachments = descriptors.data();
125 ci.subpassCount = 1;
126 ci.pSubpasses = &subpass_description;
127 ci.dependencyCount = 1;
128 ci.pDependencies = &subpass_dependency;
129 return device.GetLogical().CreateRenderPass(ci);
98} 130}
99 131
100} // namespace Vulkan 132} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
index b49b2db48..921b6efb5 100644
--- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h
+++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h
@@ -12,7 +12,7 @@
12#include <boost/functional/hash.hpp> 12#include <boost/functional/hash.hpp>
13 13
14#include "video_core/engines/maxwell_3d.h" 14#include "video_core/engines/maxwell_3d.h"
15#include "video_core/renderer_vulkan/declarations.h" 15#include "video_core/renderer_vulkan/wrapper.h"
16#include "video_core/surface.h" 16#include "video_core/surface.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
@@ -85,13 +85,13 @@ public:
85 explicit VKRenderPassCache(const VKDevice& device); 85 explicit VKRenderPassCache(const VKDevice& device);
86 ~VKRenderPassCache(); 86 ~VKRenderPassCache();
87 87
88 vk::RenderPass GetRenderPass(const RenderPassParams& params); 88 VkRenderPass GetRenderPass(const RenderPassParams& params);
89 89
90private: 90private:
91 UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const; 91 vk::RenderPass CreateRenderPass(const RenderPassParams& params) const;
92 92
93 const VKDevice& device; 93 const VKDevice& device;
94 std::unordered_map<RenderPassParams, UniqueRenderPass> cache; 94 std::unordered_map<RenderPassParams, vk::RenderPass> cache;
95}; 95};
96 96
97} // namespace Vulkan 97} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
index 525b4bb46..dc06f545a 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
@@ -6,83 +6,83 @@
6#include <optional> 6#include <optional>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "video_core/renderer_vulkan/declarations.h"
10#include "video_core/renderer_vulkan/vk_device.h" 9#include "video_core/renderer_vulkan/vk_device.h"
11#include "video_core/renderer_vulkan/vk_resource_manager.h" 10#include "video_core/renderer_vulkan/vk_resource_manager.h"
11#include "video_core/renderer_vulkan/wrapper.h"
12 12
13namespace Vulkan { 13namespace Vulkan {
14 14
15namespace {
16
15// TODO(Rodrigo): Fine tune these numbers. 17// TODO(Rodrigo): Fine tune these numbers.
16constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; 18constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
17constexpr std::size_t FENCES_GROW_STEP = 0x40; 19constexpr std::size_t FENCES_GROW_STEP = 0x40;
18 20
21VkFenceCreateInfo BuildFenceCreateInfo() {
22 VkFenceCreateInfo fence_ci;
23 fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
24 fence_ci.pNext = nullptr;
25 fence_ci.flags = 0;
26 return fence_ci;
27}
28
29} // Anonymous namespace
30
19class CommandBufferPool final : public VKFencedPool { 31class CommandBufferPool final : public VKFencedPool {
20public: 32public:
21 CommandBufferPool(const VKDevice& device) 33 CommandBufferPool(const VKDevice& device)
22 : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} 34 : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
23 35
24 void Allocate(std::size_t begin, std::size_t end) override { 36 void Allocate(std::size_t begin, std::size_t end) override {
25 const auto dev = device.GetLogical();
26 const auto& dld = device.GetDispatchLoader();
27 const u32 graphics_family = device.GetGraphicsFamily();
28
29 auto pool = std::make_unique<Pool>();
30
31 // Command buffers are going to be commited, recorded, executed every single usage cycle. 37 // Command buffers are going to be commited, recorded, executed every single usage cycle.
32 // They are also going to be reseted when commited. 38 // They are also going to be reseted when commited.
33 const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient | 39 VkCommandPoolCreateInfo command_pool_ci;
34 vk::CommandPoolCreateFlagBits::eResetCommandBuffer; 40 command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
35 const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family); 41 command_pool_ci.pNext = nullptr;
36 pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld); 42 command_pool_ci.flags =
37 43 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
38 const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle, 44 command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily();
39 vk::CommandBufferLevel::ePrimary, 45
40 static_cast<u32>(COMMAND_BUFFER_POOL_SIZE)); 46 Pool& pool = pools.emplace_back();
41 pool->cmdbufs = 47 pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci);
42 dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld); 48 pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
43
44 pools.push_back(std::move(pool));
45 } 49 }
46 50
47 vk::CommandBuffer Commit(VKFence& fence) { 51 VkCommandBuffer Commit(VKFence& fence) {
48 const std::size_t index = CommitResource(fence); 52 const std::size_t index = CommitResource(fence);
49 const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; 53 const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
50 const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; 54 const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
51 return *pools[pool_index]->cmdbufs[sub_index]; 55 return pools[pool_index].cmdbufs[sub_index];
52 } 56 }
53 57
54private: 58private:
55 struct Pool { 59 struct Pool {
56 UniqueCommandPool handle; 60 vk::CommandPool handle;
57 std::vector<UniqueCommandBuffer> cmdbufs; 61 vk::CommandBuffers cmdbufs;
58 }; 62 };
59 63
60 const VKDevice& device; 64 const VKDevice& device;
61 65 std::vector<Pool> pools;
62 std::vector<std::unique_ptr<Pool>> pools;
63}; 66};
64 67
65VKResource::VKResource() = default; 68VKResource::VKResource() = default;
66 69
67VKResource::~VKResource() = default; 70VKResource::~VKResource() = default;
68 71
69VKFence::VKFence(const VKDevice& device, UniqueFence handle) 72VKFence::VKFence(const VKDevice& device)
70 : device{device}, handle{std::move(handle)} {} 73 : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {}
71 74
72VKFence::~VKFence() = default; 75VKFence::~VKFence() = default;
73 76
74void VKFence::Wait() { 77void VKFence::Wait() {
75 static constexpr u64 timeout = std::numeric_limits<u64>::max(); 78 switch (const VkResult result = handle.Wait()) {
76 const auto dev = device.GetLogical(); 79 case VK_SUCCESS:
77 const auto& dld = device.GetDispatchLoader();
78 switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) {
79 case vk::Result::eSuccess:
80 return; 80 return;
81 case vk::Result::eErrorDeviceLost: 81 case VK_ERROR_DEVICE_LOST:
82 device.ReportLoss(); 82 device.ReportLoss();
83 [[fallthrough]]; 83 [[fallthrough]];
84 default: 84 default:
85 vk::throwResultException(result, "vk::waitForFences"); 85 throw vk::Exception(result);
86 } 86 }
87} 87}
88 88
@@ -107,13 +107,11 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
107 return false; 107 return false;
108 } 108 }
109 109
110 const auto dev = device.GetLogical();
111 const auto& dld = device.GetDispatchLoader();
112 if (gpu_wait) { 110 if (gpu_wait) {
113 // Wait for the fence if it has been requested. 111 // Wait for the fence if it has been requested.
114 dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); 112 (void)handle.Wait();
115 } else { 113 } else {
116 if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) { 114 if (handle.GetStatus() != VK_SUCCESS) {
117 // Vulkan fence is not ready, not much it can do here 115 // Vulkan fence is not ready, not much it can do here
118 return false; 116 return false;
119 } 117 }
@@ -126,7 +124,7 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
126 protected_resources.clear(); 124 protected_resources.clear();
127 125
128 // Prepare fence for reusage. 126 // Prepare fence for reusage.
129 dev.resetFences({*handle}, dld); 127 handle.Reset();
130 is_used = false; 128 is_used = false;
131 return true; 129 return true;
132} 130}
@@ -299,21 +297,16 @@ VKFence& VKResourceManager::CommitFence() {
299 return *found_fence; 297 return *found_fence;
300} 298}
301 299
302vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { 300VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
303 return command_buffer_pool->Commit(fence); 301 return command_buffer_pool->Commit(fence);
304} 302}
305 303
306void VKResourceManager::GrowFences(std::size_t new_fences_count) { 304void VKResourceManager::GrowFences(std::size_t new_fences_count) {
307 const auto dev = device.GetLogical();
308 const auto& dld = device.GetDispatchLoader();
309 const vk::FenceCreateInfo fence_ci;
310
311 const std::size_t previous_size = fences.size(); 305 const std::size_t previous_size = fences.size();
312 fences.resize(previous_size + new_fences_count); 306 fences.resize(previous_size + new_fences_count);
313 307
314 std::generate(fences.begin() + previous_size, fences.end(), [&]() { 308 std::generate(fences.begin() + previous_size, fences.end(),
315 return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld)); 309 [this] { return std::make_unique<VKFence>(device); });
316 });
317} 310}
318 311
319} // namespace Vulkan 312} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
index d4cbc95a5..f683d2276 100644
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ b/src/video_core/renderer_vulkan/vk_resource_manager.h
@@ -7,7 +7,7 @@
7#include <cstddef> 7#include <cstddef>
8#include <memory> 8#include <memory>
9#include <vector> 9#include <vector>
10#include "video_core/renderer_vulkan/declarations.h" 10#include "video_core/renderer_vulkan/wrapper.h"
11 11
12namespace Vulkan { 12namespace Vulkan {
13 13
@@ -42,7 +42,7 @@ class VKFence {
42 friend class VKResourceManager; 42 friend class VKResourceManager;
43 43
44public: 44public:
45 explicit VKFence(const VKDevice& device, UniqueFence handle); 45 explicit VKFence(const VKDevice& device);
46 ~VKFence(); 46 ~VKFence();
47 47
48 /** 48 /**
@@ -69,7 +69,7 @@ public:
69 void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; 69 void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
70 70
71 /// Retreives the fence. 71 /// Retreives the fence.
72 operator vk::Fence() const { 72 operator VkFence() const {
73 return *handle; 73 return *handle;
74 } 74 }
75 75
@@ -87,7 +87,7 @@ private:
87 bool Tick(bool gpu_wait, bool owner_wait); 87 bool Tick(bool gpu_wait, bool owner_wait);
88 88
89 const VKDevice& device; ///< Device handler 89 const VKDevice& device; ///< Device handler
90 UniqueFence handle; ///< Vulkan fence 90 vk::Fence handle; ///< Vulkan fence
91 std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence 91 std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
92 bool is_owned = false; ///< The fence has been commited but not released yet. 92 bool is_owned = false; ///< The fence has been commited but not released yet.
93 bool is_used = false; ///< The fence has been commited but it has not been checked to be free. 93 bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
@@ -181,7 +181,7 @@ public:
181 VKFence& CommitFence(); 181 VKFence& CommitFence();
182 182
183 /// Commits an unused command buffer and protects it with a fence. 183 /// Commits an unused command buffer and protects it with a fence.
184 vk::CommandBuffer CommitCommandBuffer(VKFence& fence); 184 VkCommandBuffer CommitCommandBuffer(VKFence& fence);
185 185
186private: 186private:
187 /// Allocates new fences. 187 /// Allocates new fences.
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 204b7c39c..07bbcf520 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -7,64 +7,64 @@
7#include <unordered_map> 7#include <unordered_map>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/maxwell_to_vk.h" 10#include "video_core/renderer_vulkan/maxwell_to_vk.h"
12#include "video_core/renderer_vulkan/vk_sampler_cache.h" 11#include "video_core/renderer_vulkan/vk_sampler_cache.h"
12#include "video_core/renderer_vulkan/wrapper.h"
13#include "video_core/textures/texture.h" 13#include "video_core/textures/texture.h"
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
17static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) { 17namespace {
18
19VkBorderColor ConvertBorderColor(std::array<float, 4> color) {
18 // TODO(Rodrigo): Manage integer border colors 20 // TODO(Rodrigo): Manage integer border colors
19 if (color == std::array<float, 4>{0, 0, 0, 0}) { 21 if (color == std::array<float, 4>{0, 0, 0, 0}) {
20 return vk::BorderColor::eFloatTransparentBlack; 22 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
21 } else if (color == std::array<float, 4>{0, 0, 0, 1}) { 23 } else if (color == std::array<float, 4>{0, 0, 0, 1}) {
22 return vk::BorderColor::eFloatOpaqueBlack; 24 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
23 } else if (color == std::array<float, 4>{1, 1, 1, 1}) { 25 } else if (color == std::array<float, 4>{1, 1, 1, 1}) {
24 return vk::BorderColor::eFloatOpaqueWhite; 26 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
27 }
28 if (color[0] + color[1] + color[2] > 1.35f) {
29 // If color elements are brighter than roughly 0.5 average, use white border
30 return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
31 } else if (color[3] > 0.5f) {
32 return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
25 } else { 33 } else {
26 if (color[0] + color[1] + color[2] > 1.35f) { 34 return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
27 // If color elements are brighter than roughly 0.5 average, use white border
28 return vk::BorderColor::eFloatOpaqueWhite;
29 }
30 if (color[3] > 0.5f) {
31 return vk::BorderColor::eFloatOpaqueBlack;
32 }
33 return vk::BorderColor::eFloatTransparentBlack;
34 } 35 }
35} 36}
36 37
38} // Anonymous namespace
39
37VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} 40VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
38 41
39VKSamplerCache::~VKSamplerCache() = default; 42VKSamplerCache::~VKSamplerCache() = default;
40 43
41UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { 44vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
42 const float max_anisotropy{tsc.GetMaxAnisotropy()}; 45 VkSamplerCreateInfo ci;
43 const bool has_anisotropy{max_anisotropy > 1.0f}; 46 ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
44 47 ci.pNext = nullptr;
45 const auto border_color{tsc.GetBorderColor()}; 48 ci.flags = 0;
46 const auto vk_border_color{TryConvertBorderColor(border_color)}; 49 ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter);
47 50 ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter);
48 constexpr bool unnormalized_coords{false}; 51 ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter);
49 52 ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter);
50 const vk::SamplerCreateInfo sampler_ci( 53 ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter);
51 {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), 54 ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter);
52 MaxwellToVK::Sampler::Filter(tsc.min_filter), 55 ci.mipLodBias = tsc.GetLodBias();
53 MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), 56 ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE;
54 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), 57 ci.maxAnisotropy = tsc.GetMaxAnisotropy();
55 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), 58 ci.compareEnable = tsc.depth_compare_enabled;
56 MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), 59 ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func);
57 has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, 60 ci.minLod = tsc.GetMinLod();
58 MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), 61 ci.maxLod = tsc.GetMaxLod();
59 tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), 62 ci.borderColor = ConvertBorderColor(tsc.GetBorderColor());
60 unnormalized_coords); 63 ci.unnormalizedCoordinates = VK_FALSE;
61 64 return device.GetLogical().CreateSampler(ci);
62 const auto& dld{device.GetDispatchLoader()};
63 const auto dev{device.GetLogical()};
64 return dev.createSamplerUnique(sampler_ci, nullptr, dld);
65} 65}
66 66
67vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const { 67VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
68 return *sampler; 68 return *sampler;
69} 69}
70 70
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
index 1f73b716b..a33d1c0ee 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -4,7 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include "video_core/renderer_vulkan/declarations.h" 7#include "video_core/renderer_vulkan/wrapper.h"
8#include "video_core/sampler_cache.h" 8#include "video_core/sampler_cache.h"
9#include "video_core/textures/texture.h" 9#include "video_core/textures/texture.h"
10 10
@@ -12,15 +12,15 @@ namespace Vulkan {
12 12
13class VKDevice; 13class VKDevice;
14 14
15class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> { 15class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> {
16public: 16public:
17 explicit VKSamplerCache(const VKDevice& device); 17 explicit VKSamplerCache(const VKDevice& device);
18 ~VKSamplerCache(); 18 ~VKSamplerCache();
19 19
20protected: 20protected:
21 UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; 21 vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
22 22
23 vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override; 23 VkSampler ToSamplerType(const vk::Sampler& sampler) const override;
24 24
25private: 25private:
26 const VKDevice& device; 26 const VKDevice& device;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index b61d4fe63..900f551b3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -10,23 +10,22 @@
10 10
11#include "common/assert.h" 11#include "common/assert.h"
12#include "common/microprofile.h" 12#include "common/microprofile.h"
13#include "video_core/renderer_vulkan/declarations.h"
14#include "video_core/renderer_vulkan/vk_device.h" 13#include "video_core/renderer_vulkan/vk_device.h"
15#include "video_core/renderer_vulkan/vk_query_cache.h" 14#include "video_core/renderer_vulkan/vk_query_cache.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h" 15#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_scheduler.h" 16#include "video_core/renderer_vulkan/vk_scheduler.h"
18#include "video_core/renderer_vulkan/vk_state_tracker.h" 17#include "video_core/renderer_vulkan/vk_state_tracker.h"
18#include "video_core/renderer_vulkan/wrapper.h"
19 19
20namespace Vulkan { 20namespace Vulkan {
21 21
22MICROPROFILE_DECLARE(Vulkan_WaitForWorker); 22MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
23 23
24void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, 24void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
25 const vk::DispatchLoaderDynamic& dld) {
26 auto command = first; 25 auto command = first;
27 while (command != nullptr) { 26 while (command != nullptr) {
28 auto next = command->GetNext(); 27 auto next = command->GetNext();
29 command->Execute(cmdbuf, dld); 28 command->Execute(cmdbuf);
30 command->~Command(); 29 command->~Command();
31 command = next; 30 command = next;
32 } 31 }
@@ -51,7 +50,7 @@ VKScheduler::~VKScheduler() {
51 worker_thread.join(); 50 worker_thread.join();
52} 51}
53 52
54void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { 53void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) {
55 SubmitExecution(semaphore); 54 SubmitExecution(semaphore);
56 if (release_fence) { 55 if (release_fence) {
57 current_fence->Release(); 56 current_fence->Release();
@@ -59,7 +58,7 @@ void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
59 AllocateNewContext(); 58 AllocateNewContext();
60} 59}
61 60
62void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { 61void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) {
63 SubmitExecution(semaphore); 62 SubmitExecution(semaphore);
64 current_fence->Wait(); 63 current_fence->Wait();
65 if (release_fence) { 64 if (release_fence) {
@@ -89,17 +88,34 @@ void VKScheduler::DispatchWork() {
89 AcquireNewChunk(); 88 AcquireNewChunk();
90} 89}
91 90
92void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) { 91void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer,
93 if (state.renderpass && renderpass_bi == *state.renderpass) { 92 VkExtent2D render_area) {
93 if (renderpass == state.renderpass && framebuffer == state.framebuffer &&
94 render_area.width == state.render_area.width &&
95 render_area.height == state.render_area.height) {
94 return; 96 return;
95 } 97 }
96 const bool end_renderpass = state.renderpass.has_value(); 98 const bool end_renderpass = state.renderpass != nullptr;
97 state.renderpass = renderpass_bi; 99 state.renderpass = renderpass;
98 Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) { 100 state.framebuffer = framebuffer;
101 state.render_area = render_area;
102
103 VkRenderPassBeginInfo renderpass_bi;
104 renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
105 renderpass_bi.pNext = nullptr;
106 renderpass_bi.renderPass = renderpass;
107 renderpass_bi.framebuffer = framebuffer;
108 renderpass_bi.renderArea.offset.x = 0;
109 renderpass_bi.renderArea.offset.y = 0;
110 renderpass_bi.renderArea.extent = render_area;
111 renderpass_bi.clearValueCount = 0;
112 renderpass_bi.pClearValues = nullptr;
113
114 Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) {
99 if (end_renderpass) { 115 if (end_renderpass) {
100 cmdbuf.endRenderPass(dld); 116 cmdbuf.EndRenderPass();
101 } 117 }
102 cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); 118 cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
103 }); 119 });
104} 120}
105 121
@@ -107,13 +123,13 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
107 EndRenderPass(); 123 EndRenderPass();
108} 124}
109 125
110void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) { 126void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
111 if (state.graphics_pipeline == pipeline) { 127 if (state.graphics_pipeline == pipeline) {
112 return; 128 return;
113 } 129 }
114 state.graphics_pipeline = pipeline; 130 state.graphics_pipeline = pipeline;
115 Record([pipeline](auto cmdbuf, auto& dld) { 131 Record([pipeline](vk::CommandBuffer cmdbuf) {
116 cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); 132 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
117 }); 133 });
118} 134}
119 135
@@ -126,37 +142,50 @@ void VKScheduler::WorkerThread() {
126 } 142 }
127 auto extracted_chunk = std::move(chunk_queue.Front()); 143 auto extracted_chunk = std::move(chunk_queue.Front());
128 chunk_queue.Pop(); 144 chunk_queue.Pop();
129 extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader()); 145 extracted_chunk->ExecuteAll(current_cmdbuf);
130 chunk_reserve.Push(std::move(extracted_chunk)); 146 chunk_reserve.Push(std::move(extracted_chunk));
131 } while (!quit); 147 } while (!quit);
132} 148}
133 149
134void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { 150void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
135 EndPendingOperations(); 151 EndPendingOperations();
136 InvalidateState(); 152 InvalidateState();
137 WaitWorker(); 153 WaitWorker();
138 154
139 std::unique_lock lock{mutex}; 155 std::unique_lock lock{mutex};
140 156
141 const auto queue = device.GetGraphicsQueue(); 157 current_cmdbuf.End();
142 const auto& dld = device.GetDispatchLoader();
143 current_cmdbuf.end(dld);
144 158
145 const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U, 159 VkSubmitInfo submit_info;
146 &semaphore); 160 submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
147 queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld); 161 submit_info.pNext = nullptr;
162 submit_info.waitSemaphoreCount = 0;
163 submit_info.pWaitSemaphores = nullptr;
164 submit_info.pWaitDstStageMask = nullptr;
165 submit_info.commandBufferCount = 1;
166 submit_info.pCommandBuffers = current_cmdbuf.address();
167 submit_info.signalSemaphoreCount = semaphore ? 1 : 0;
168 submit_info.pSignalSemaphores = &semaphore;
169 device.GetGraphicsQueue().Submit(submit_info, *current_fence);
148} 170}
149 171
150void VKScheduler::AllocateNewContext() { 172void VKScheduler::AllocateNewContext() {
151 ++ticks; 173 ++ticks;
152 174
175 VkCommandBufferBeginInfo cmdbuf_bi;
176 cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
177 cmdbuf_bi.pNext = nullptr;
178 cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
179 cmdbuf_bi.pInheritanceInfo = nullptr;
180
153 std::unique_lock lock{mutex}; 181 std::unique_lock lock{mutex};
154 current_fence = next_fence; 182 current_fence = next_fence;
155 next_fence = &resource_manager.CommitFence(); 183 next_fence = &resource_manager.CommitFence();
156 184
157 current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); 185 current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence),
158 current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, 186 device.GetDispatchLoader());
159 device.GetDispatchLoader()); 187 current_cmdbuf.Begin(cmdbuf_bi);
188
160 // Enable counters once again. These are disabled when a command buffer is finished. 189 // Enable counters once again. These are disabled when a command buffer is finished.
161 if (query_cache) { 190 if (query_cache) {
162 query_cache->UpdateCounters(); 191 query_cache->UpdateCounters();
@@ -177,8 +206,8 @@ void VKScheduler::EndRenderPass() {
177 if (!state.renderpass) { 206 if (!state.renderpass) {
178 return; 207 return;
179 } 208 }
180 state.renderpass = std::nullopt; 209 state.renderpass = nullptr;
181 Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); }); 210 Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); });
182} 211}
183 212
184void VKScheduler::AcquireNewChunk() { 213void VKScheduler::AcquireNewChunk() {
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index c7cc291c3..82a8adc69 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -13,7 +13,7 @@
13#include <utility> 13#include <utility>
14#include "common/common_types.h" 14#include "common/common_types.h"
15#include "common/threadsafe_queue.h" 15#include "common/threadsafe_queue.h"
16#include "video_core/renderer_vulkan/declarations.h" 16#include "video_core/renderer_vulkan/wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
@@ -49,10 +49,10 @@ public:
49 ~VKScheduler(); 49 ~VKScheduler();
50 50
51 /// Sends the current execution context to the GPU. 51 /// Sends the current execution context to the GPU.
52 void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); 52 void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr);
53 53
54 /// Sends the current execution context to the GPU and waits for it to complete. 54 /// Sends the current execution context to the GPU and waits for it to complete.
55 void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); 55 void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr);
56 56
57 /// Waits for the worker thread to finish executing everything. After this function returns it's 57 /// Waits for the worker thread to finish executing everything. After this function returns it's
58 /// safe to touch worker resources. 58 /// safe to touch worker resources.
@@ -62,14 +62,15 @@ public:
62 void DispatchWork(); 62 void DispatchWork();
63 63
64 /// Requests to begin a renderpass. 64 /// Requests to begin a renderpass.
65 void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi); 65 void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer,
66 VkExtent2D render_area);
66 67
67 /// Requests the current executino context to be able to execute operations only allowed outside 68 /// Requests the current executino context to be able to execute operations only allowed outside
68 /// of a renderpass. 69 /// of a renderpass.
69 void RequestOutsideRenderPassOperationContext(); 70 void RequestOutsideRenderPassOperationContext();
70 71
71 /// Binds a pipeline to the current execution context. 72 /// Binds a pipeline to the current execution context.
72 void BindGraphicsPipeline(vk::Pipeline pipeline); 73 void BindGraphicsPipeline(VkPipeline pipeline);
73 74
74 /// Assigns the query cache. 75 /// Assigns the query cache.
75 void SetQueryCache(VKQueryCache& query_cache_) { 76 void SetQueryCache(VKQueryCache& query_cache_) {
@@ -101,8 +102,7 @@ private:
101 public: 102 public:
102 virtual ~Command() = default; 103 virtual ~Command() = default;
103 104
104 virtual void Execute(vk::CommandBuffer cmdbuf, 105 virtual void Execute(vk::CommandBuffer cmdbuf) const = 0;
105 const vk::DispatchLoaderDynamic& dld) const = 0;
106 106
107 Command* GetNext() const { 107 Command* GetNext() const {
108 return next; 108 return next;
@@ -125,9 +125,8 @@ private:
125 TypedCommand(TypedCommand&&) = delete; 125 TypedCommand(TypedCommand&&) = delete;
126 TypedCommand& operator=(TypedCommand&&) = delete; 126 TypedCommand& operator=(TypedCommand&&) = delete;
127 127
128 void Execute(vk::CommandBuffer cmdbuf, 128 void Execute(vk::CommandBuffer cmdbuf) const override {
129 const vk::DispatchLoaderDynamic& dld) const override { 129 command(cmdbuf);
130 command(cmdbuf, dld);
131 } 130 }
132 131
133 private: 132 private:
@@ -136,7 +135,7 @@ private:
136 135
137 class CommandChunk final { 136 class CommandChunk final {
138 public: 137 public:
139 void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld); 138 void ExecuteAll(vk::CommandBuffer cmdbuf);
140 139
141 template <typename T> 140 template <typename T>
142 bool Record(T& command) { 141 bool Record(T& command) {
@@ -175,7 +174,7 @@ private:
175 174
176 void WorkerThread(); 175 void WorkerThread();
177 176
178 void SubmitExecution(vk::Semaphore semaphore); 177 void SubmitExecution(VkSemaphore semaphore);
179 178
180 void AllocateNewContext(); 179 void AllocateNewContext();
181 180
@@ -198,8 +197,10 @@ private:
198 VKFence* next_fence = nullptr; 197 VKFence* next_fence = nullptr;
199 198
200 struct State { 199 struct State {
201 std::optional<vk::RenderPassBeginInfo> renderpass; 200 VkRenderPass renderpass = nullptr;
202 vk::Pipeline graphics_pipeline; 201 VkFramebuffer framebuffer = nullptr;
202 VkExtent2D render_area = {0, 0};
203 VkPipeline graphics_pipeline = nullptr;
203 } state; 204 } state;
204 205
205 std::unique_ptr<CommandChunk> chunk; 206 std::unique_ptr<CommandChunk> chunk;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 24d3ca08f..aaa138f52 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -35,7 +35,7 @@ namespace {
35using Sirit::Id; 35using Sirit::Id;
36using Tegra::Engines::ShaderType; 36using Tegra::Engines::ShaderType;
37using Tegra::Shader::Attribute; 37using Tegra::Shader::Attribute;
38using Tegra::Shader::AttributeUse; 38using Tegra::Shader::PixelImap;
39using Tegra::Shader::Register; 39using Tegra::Shader::Register;
40using namespace VideoCommon::Shader; 40using namespace VideoCommon::Shader;
41 41
@@ -752,16 +752,16 @@ private:
752 if (stage != ShaderType::Fragment) { 752 if (stage != ShaderType::Fragment) {
753 continue; 753 continue;
754 } 754 }
755 switch (header.ps.GetAttributeUse(location)) { 755 switch (header.ps.GetPixelImap(location)) {
756 case AttributeUse::Constant: 756 case PixelImap::Constant:
757 Decorate(id, spv::Decoration::Flat); 757 Decorate(id, spv::Decoration::Flat);
758 break; 758 break;
759 case AttributeUse::ScreenLinear: 759 case PixelImap::Perspective:
760 Decorate(id, spv::Decoration::NoPerspective);
761 break;
762 case AttributeUse::Perspective:
763 // Default 760 // Default
764 break; 761 break;
762 case PixelImap::ScreenLinear:
763 Decorate(id, spv::Decoration::NoPerspective);
764 break;
765 default: 765 default:
766 UNREACHABLE_MSG("Unused attribute being fetched"); 766 UNREACHABLE_MSG("Unused attribute being fetched");
767 } 767 }
@@ -801,7 +801,7 @@ private:
801 if (IsOutputAttributeArray()) { 801 if (IsOutputAttributeArray()) {
802 const u32 num = GetNumOutputVertices(); 802 const u32 num = GetNumOutputVertices();
803 type = TypeArray(type, Constant(t_uint, num)); 803 type = TypeArray(type, Constant(t_uint, num));
804 if (device.GetDriverID() != vk::DriverIdKHR::eIntelProprietaryWindows) { 804 if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) {
805 // Intel's proprietary driver fails to setup defaults for arrayed output 805 // Intel's proprietary driver fails to setup defaults for arrayed output
806 // attributes. 806 // attributes.
807 varying_default = ConstantComposite(type, std::vector(num, varying_default)); 807 varying_default = ConstantComposite(type, std::vector(num, varying_default));
@@ -1145,9 +1145,6 @@ private:
1145 switch (attribute) { 1145 switch (attribute) {
1146 case Attribute::Index::Position: { 1146 case Attribute::Index::Position: {
1147 if (stage == ShaderType::Fragment) { 1147 if (stage == ShaderType::Fragment) {
1148 if (element == 3) {
1149 return {Constant(t_float, 1.0f), Type::Float};
1150 }
1151 return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), 1148 return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)),
1152 Type::Float}; 1149 Type::Float};
1153 } 1150 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp
index b97c4cb3d..784839327 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@@ -8,27 +8,25 @@
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/renderer_vulkan/declarations.h"
12#include "video_core/renderer_vulkan/vk_device.h" 11#include "video_core/renderer_vulkan/vk_device.h"
13#include "video_core/renderer_vulkan/vk_shader_util.h" 12#include "video_core/renderer_vulkan/vk_shader_util.h"
13#include "video_core/renderer_vulkan/wrapper.h"
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
17UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { 17vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) {
18 // Avoid undefined behavior by copying to a staging allocation 18 // Avoid undefined behavior by copying to a staging allocation
19 ASSERT(code_size % sizeof(u32) == 0); 19 ASSERT(code_size % sizeof(u32) == 0);
20 const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); 20 const auto data = std::make_unique<u32[]>(code_size / sizeof(u32));
21 std::memcpy(data.get(), code_data, code_size); 21 std::memcpy(data.get(), code_data, code_size);
22 22
23 const auto dev = device.GetLogical(); 23 VkShaderModuleCreateInfo ci;
24 const auto& dld = device.GetDispatchLoader(); 24 ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
25 const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get()); 25 ci.pNext = nullptr;
26 vk::ShaderModule shader_module; 26 ci.flags = 0;
27 if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) { 27 ci.codeSize = code_size;
28 UNREACHABLE_MSG("Shader module failed to build!"); 28 ci.pCode = data.get();
29 } 29 return device.GetLogical().CreateShaderModule(ci);
30
31 return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld));
32} 30}
33 31
34} // namespace Vulkan 32} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h
index c06d65970..be38d6697 100644
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@@ -6,12 +6,12 @@
6 6
7#include <vector> 7#include <vector>
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/renderer_vulkan/declarations.h" 9#include "video_core/renderer_vulkan/wrapper.h"
10 10
11namespace Vulkan { 11namespace Vulkan {
12 12
13class VKDevice; 13class VKDevice;
14 14
15UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); 15vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data);
16 16
17} // namespace Vulkan 17} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 374959f82..94d954d7a 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -13,6 +13,7 @@
13#include "video_core/renderer_vulkan/vk_resource_manager.h" 13#include "video_core/renderer_vulkan/vk_resource_manager.h"
14#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 15#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
16#include "video_core/renderer_vulkan/wrapper.h"
16 17
17namespace Vulkan { 18namespace Vulkan {
18 19
@@ -71,17 +72,23 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_
71} 72}
72 73
73VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { 74VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
74 const auto usage =
75 vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
76 vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer |
77 vk::BufferUsageFlagBits::eIndexBuffer;
78 const u32 log2 = Common::Log2Ceil64(size); 75 const u32 log2 = Common::Log2Ceil64(size);
79 const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, 76
80 nullptr); 77 VkBufferCreateInfo ci;
81 const auto dev = device.GetLogical(); 78 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
79 ci.pNext = nullptr;
80 ci.flags = 0;
81 ci.size = 1ULL << log2;
82 ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
83 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
84 VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
85 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
86 ci.queueFamilyIndexCount = 0;
87 ci.pQueueFamilyIndices = nullptr;
88
82 auto buffer = std::make_unique<VKBuffer>(); 89 auto buffer = std::make_unique<VKBuffer>();
83 buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader()); 90 buffer->handle = device.GetLogical().CreateBuffer(ci);
84 buffer->commit = memory_manager.Commit(*buffer->handle, host_visible); 91 buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
85 92
86 auto& entries = GetCache(host_visible)[log2].entries; 93 auto& entries = GetCache(host_visible)[log2].entries;
87 return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; 94 return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 4d9488f49..a0840ff8c 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -11,9 +11,9 @@
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13 13
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_memory_manager.h" 14#include "video_core/renderer_vulkan/vk_memory_manager.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h" 15#include "video_core/renderer_vulkan/vk_resource_manager.h"
16#include "video_core/renderer_vulkan/wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
@@ -22,7 +22,7 @@ class VKFenceWatch;
22class VKScheduler; 22class VKScheduler;
23 23
24struct VKBuffer final { 24struct VKBuffer final {
25 UniqueBuffer handle; 25 vk::Buffer handle;
26 VKMemoryCommit commit; 26 VKMemoryCommit commit;
27}; 27};
28 28
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index d48d3b44c..38a93a01a 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -9,11 +9,11 @@
9 9
10#include "common/alignment.h" 10#include "common/alignment.h"
11#include "common/assert.h" 11#include "common/assert.h"
12#include "video_core/renderer_vulkan/declarations.h"
13#include "video_core/renderer_vulkan/vk_device.h" 12#include "video_core/renderer_vulkan/vk_device.h"
14#include "video_core/renderer_vulkan/vk_resource_manager.h" 13#include "video_core/renderer_vulkan/vk_resource_manager.h"
15#include "video_core/renderer_vulkan/vk_scheduler.h" 14#include "video_core/renderer_vulkan/vk_scheduler.h"
16#include "video_core/renderer_vulkan/vk_stream_buffer.h" 15#include "video_core/renderer_vulkan/vk_stream_buffer.h"
16#include "video_core/renderer_vulkan/wrapper.h"
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
@@ -25,8 +25,8 @@ constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
25constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; 25constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024;
26 26
27std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, 27std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
28 vk::MemoryPropertyFlags wanted) { 28 VkMemoryPropertyFlags wanted) {
29 const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); 29 const auto properties = device.GetPhysical().GetMemoryProperties();
30 for (u32 i = 0; i < properties.memoryTypeCount; i++) { 30 for (u32 i = 0; i < properties.memoryTypeCount; i++) {
31 if (!(filter & (1 << i))) { 31 if (!(filter & (1 << i))) {
32 continue; 32 continue;
@@ -35,13 +35,13 @@ std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter,
35 return i; 35 return i;
36 } 36 }
37 } 37 }
38 return {}; 38 return std::nullopt;
39} 39}
40 40
41} // Anonymous namespace 41} // Anonymous namespace
42 42
43VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 43VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
44 vk::BufferUsageFlags usage) 44 VkBufferUsageFlags usage)
45 : device{device}, scheduler{scheduler} { 45 : device{device}, scheduler{scheduler} {
46 CreateBuffers(usage); 46 CreateBuffers(usage);
47 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); 47 ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
@@ -78,17 +78,13 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) {
78 invalidated = true; 78 invalidated = true;
79 } 79 }
80 80
81 const auto dev = device.GetLogical(); 81 return {memory.Map(offset, size), offset, invalidated};
82 const auto& dld = device.GetDispatchLoader();
83 const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld));
84 return {pointer, offset, invalidated};
85} 82}
86 83
87void VKStreamBuffer::Unmap(u64 size) { 84void VKStreamBuffer::Unmap(u64 size) {
88 ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); 85 ASSERT_MSG(size <= mapped_size, "Reserved size is too small");
89 86
90 const auto dev = device.GetLogical(); 87 memory.Unmap();
91 dev.unmapMemory(*memory, device.GetDispatchLoader());
92 88
93 offset += size; 89 offset += size;
94 90
@@ -101,30 +97,42 @@ void VKStreamBuffer::Unmap(u64 size) {
101 watch.fence.Watch(scheduler.GetFence()); 97 watch.fence.Watch(scheduler.GetFence());
102} 98}
103 99
104void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { 100void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
105 const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, 101 VkBufferCreateInfo buffer_ci;
106 0, nullptr); 102 buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
107 const auto dev = device.GetLogical(); 103 buffer_ci.pNext = nullptr;
108 const auto& dld = device.GetDispatchLoader(); 104 buffer_ci.flags = 0;
109 buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); 105 buffer_ci.size = STREAM_BUFFER_SIZE;
106 buffer_ci.usage = usage;
107 buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
108 buffer_ci.queueFamilyIndexCount = 0;
109 buffer_ci.pQueueFamilyIndices = nullptr;
110
111 const auto& dev = device.GetLogical();
112 buffer = dev.CreateBuffer(buffer_ci);
110 113
111 const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); 114 const auto& dld = device.GetDispatchLoader();
115 const auto requirements = dev.GetBufferMemoryRequirements(*buffer);
112 // Prefer device local host visible allocations (this should hit AMD's pinned memory). 116 // Prefer device local host visible allocations (this should hit AMD's pinned memory).
113 auto type = FindMemoryType(device, requirements.memoryTypeBits, 117 auto type =
114 vk::MemoryPropertyFlagBits::eHostVisible | 118 FindMemoryType(device, requirements.memoryTypeBits,
115 vk::MemoryPropertyFlagBits::eHostCoherent | 119 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
116 vk::MemoryPropertyFlagBits::eDeviceLocal); 120 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
117 if (!type) { 121 if (!type) {
118 // Otherwise search for a host visible allocation. 122 // Otherwise search for a host visible allocation.
119 type = FindMemoryType(device, requirements.memoryTypeBits, 123 type = FindMemoryType(device, requirements.memoryTypeBits,
120 vk::MemoryPropertyFlagBits::eHostVisible | 124 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
121 vk::MemoryPropertyFlagBits::eHostCoherent); 125 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
122 ASSERT_MSG(type, "No host visible and coherent memory type found"); 126 ASSERT_MSG(type, "No host visible and coherent memory type found");
123 } 127 }
124 const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); 128 VkMemoryAllocateInfo memory_ai;
125 memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); 129 memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
126 130 memory_ai.pNext = nullptr;
127 dev.bindBufferMemory(*buffer, *memory, 0, dld); 131 memory_ai.allocationSize = requirements.size;
132 memory_ai.memoryTypeIndex = *type;
133
134 memory = dev.AllocateMemory(memory_ai);
135 buffer.BindMemory(*memory, 0);
128} 136}
129 137
130void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { 138void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 187c0c612..58ce8b973 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -9,7 +9,7 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -21,7 +21,7 @@ class VKScheduler;
21class VKStreamBuffer final { 21class VKStreamBuffer final {
22public: 22public:
23 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, 23 explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
24 vk::BufferUsageFlags usage); 24 VkBufferUsageFlags usage);
25 ~VKStreamBuffer(); 25 ~VKStreamBuffer();
26 26
27 /** 27 /**
@@ -35,7 +35,7 @@ public:
35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. 35 /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
36 void Unmap(u64 size); 36 void Unmap(u64 size);
37 37
38 vk::Buffer GetHandle() const { 38 VkBuffer GetHandle() const {
39 return *buffer; 39 return *buffer;
40 } 40 }
41 41
@@ -46,20 +46,18 @@ private:
46 }; 46 };
47 47
48 /// Creates Vulkan buffer handles committing the required the required memory. 48 /// Creates Vulkan buffer handles committing the required the required memory.
49 void CreateBuffers(vk::BufferUsageFlags usage); 49 void CreateBuffers(VkBufferUsageFlags usage);
50 50
51 /// Increases the amount of watches available. 51 /// Increases the amount of watches available.
52 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); 52 void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
53 53
54 void WaitPendingOperations(u64 requested_upper_bound); 54 void WaitPendingOperations(u64 requested_upper_bound);
55 55
56 const VKDevice& device; ///< Vulkan device manager. 56 const VKDevice& device; ///< Vulkan device manager.
57 VKScheduler& scheduler; ///< Command scheduler. 57 VKScheduler& scheduler; ///< Command scheduler.
58 const vk::AccessFlags access; ///< Access usage of this stream buffer.
59 const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer.
60 58
61 UniqueBuffer buffer; ///< Mapped buffer. 59 vk::Buffer buffer; ///< Mapped buffer.
62 UniqueDeviceMemory memory; ///< Memory allocation. 60 vk::DeviceMemory memory; ///< Memory allocation.
63 61
64 u64 offset{}; ///< Buffer iterator. 62 u64 offset{}; ///< Buffer iterator.
65 u64 mapped_size{}; ///< Size reserved for the current copy. 63 u64 mapped_size{}; ///< Size reserved for the current copy.
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 9e73fa9cd..bffd8f32a 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -11,69 +11,64 @@
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "core/core.h" 12#include "core/core.h"
13#include "core/frontend/framebuffer_layout.h" 13#include "core/frontend/framebuffer_layout.h"
14#include "video_core/renderer_vulkan/declarations.h"
15#include "video_core/renderer_vulkan/vk_device.h" 14#include "video_core/renderer_vulkan/vk_device.h"
16#include "video_core/renderer_vulkan/vk_resource_manager.h" 15#include "video_core/renderer_vulkan/vk_resource_manager.h"
17#include "video_core/renderer_vulkan/vk_swapchain.h" 16#include "video_core/renderer_vulkan/vk_swapchain.h"
17#include "video_core/renderer_vulkan/wrapper.h"
18 18
19namespace Vulkan { 19namespace Vulkan {
20 20
21namespace { 21namespace {
22 22
23vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats, 23VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) {
24 bool srgb) { 24 if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
25 if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { 25 VkSurfaceFormatKHR format;
26 vk::SurfaceFormatKHR format; 26 format.format = VK_FORMAT_B8G8R8A8_UNORM;
27 format.format = vk::Format::eB8G8R8A8Unorm; 27 format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
28 format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear;
29 return format; 28 return format;
30 } 29 }
31 const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { 30 const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) {
32 const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm; 31 const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM;
33 return format.format == request_format && 32 return format.format == request_format &&
34 format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear; 33 format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
35 }); 34 });
36 return found != formats.end() ? *found : formats[0]; 35 return found != formats.end() ? *found : formats[0];
37} 36}
38 37
39vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) { 38VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) {
40 // Mailbox doesn't lock the application like fifo (vsync), prefer it 39 // Mailbox doesn't lock the application like fifo (vsync), prefer it
41 const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) { 40 const auto found = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR);
42 return mode == vk::PresentModeKHR::eMailbox; 41 return found != modes.end() ? *found : VK_PRESENT_MODE_FIFO_KHR;
43 });
44 return found != modes.end() ? *found : vk::PresentModeKHR::eFifo;
45} 42}
46 43
47vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, 44VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) {
48 u32 height) {
49 constexpr auto undefined_size{std::numeric_limits<u32>::max()}; 45 constexpr auto undefined_size{std::numeric_limits<u32>::max()};
50 if (capabilities.currentExtent.width != undefined_size) { 46 if (capabilities.currentExtent.width != undefined_size) {
51 return capabilities.currentExtent; 47 return capabilities.currentExtent;
52 } 48 }
53 vk::Extent2D extent = {width, height}; 49 VkExtent2D extent;
54 extent.width = std::max(capabilities.minImageExtent.width, 50 extent.width = std::max(capabilities.minImageExtent.width,
55 std::min(capabilities.maxImageExtent.width, extent.width)); 51 std::min(capabilities.maxImageExtent.width, width));
56 extent.height = std::max(capabilities.minImageExtent.height, 52 extent.height = std::max(capabilities.minImageExtent.height,
57 std::min(capabilities.maxImageExtent.height, extent.height)); 53 std::min(capabilities.maxImageExtent.height, height));
58 return extent; 54 return extent;
59} 55}
60 56
61} // Anonymous namespace 57} // Anonymous namespace
62 58
63VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device) 59VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device)
64 : surface{surface}, device{device} {} 60 : surface{surface}, device{device} {}
65 61
66VKSwapchain::~VKSwapchain() = default; 62VKSwapchain::~VKSwapchain() = default;
67 63
68void VKSwapchain::Create(u32 width, u32 height, bool srgb) { 64void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
69 const auto& dld = device.GetDispatchLoader();
70 const auto physical_device = device.GetPhysical(); 65 const auto physical_device = device.GetPhysical();
71 const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)}; 66 const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)};
72 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { 67 if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) {
73 return; 68 return;
74 } 69 }
75 70
76 device.GetLogical().waitIdle(dld); 71 device.GetLogical().WaitIdle();
77 Destroy(); 72 Destroy();
78 73
79 CreateSwapchain(capabilities, width, height, srgb); 74 CreateSwapchain(capabilities, width, height, srgb);
@@ -84,10 +79,8 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
84} 79}
85 80
86void VKSwapchain::AcquireNextImage() { 81void VKSwapchain::AcquireNextImage() {
87 const auto dev{device.GetLogical()}; 82 device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
88 const auto& dld{device.GetDispatchLoader()}; 83 *present_semaphores[frame_index], {}, &image_index);
89 dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
90 *present_semaphores[frame_index], {}, &image_index, dld);
91 84
92 if (auto& fence = fences[image_index]; fence) { 85 if (auto& fence = fences[image_index]; fence) {
93 fence->Wait(); 86 fence->Wait();
@@ -96,29 +89,37 @@ void VKSwapchain::AcquireNextImage() {
96 } 89 }
97} 90}
98 91
99bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) { 92bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
100 const vk::Semaphore present_semaphore{*present_semaphores[frame_index]}; 93 const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
101 const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore}; 94 const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
102 const u32 wait_semaphore_count{render_semaphore ? 2U : 1U};
103 const auto& dld{device.GetDispatchLoader()};
104 const auto present_queue{device.GetPresentQueue()}; 95 const auto present_queue{device.GetPresentQueue()};
105 bool recreated = false; 96 bool recreated = false;
106 97
107 const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1, 98 VkPresentInfoKHR present_info;
108 &swapchain.get(), &image_index, {}); 99 present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
109 switch (const auto result = present_queue.presentKHR(&present_info, dld); result) { 100 present_info.pNext = nullptr;
110 case vk::Result::eSuccess: 101 present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U;
102 present_info.pWaitSemaphores = semaphores.data();
103 present_info.swapchainCount = 1;
104 present_info.pSwapchains = swapchain.address();
105 present_info.pImageIndices = &image_index;
106 present_info.pResults = nullptr;
107
108 switch (const VkResult result = present_queue.Present(present_info)) {
109 case VK_SUCCESS:
110 break;
111 case VK_SUBOPTIMAL_KHR:
112 LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain");
111 break; 113 break;
112 case vk::Result::eErrorOutOfDateKHR: 114 case VK_ERROR_OUT_OF_DATE_KHR:
113 if (current_width > 0 && current_height > 0) { 115 if (current_width > 0 && current_height > 0) {
114 Create(current_width, current_height, current_srgb); 116 Create(current_width, current_height, current_srgb);
115 recreated = true; 117 recreated = true;
116 } 118 }
117 break; 119 break;
118 default: 120 default:
119 LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!", 121 LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result));
120 vk::to_string(result)); 122 break;
121 UNREACHABLE();
122 } 123 }
123 124
124 ASSERT(fences[image_index] == nullptr); 125 ASSERT(fences[image_index] == nullptr);
@@ -132,74 +133,92 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu
132 return framebuffer.width != current_width || framebuffer.height != current_height; 133 return framebuffer.width != current_width || framebuffer.height != current_height;
133} 134}
134 135
135void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, 136void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width,
136 u32 height, bool srgb) { 137 u32 height, bool srgb) {
137 const auto& dld{device.GetDispatchLoader()};
138 const auto physical_device{device.GetPhysical()}; 138 const auto physical_device{device.GetPhysical()};
139 const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)}; 139 const auto formats{physical_device.GetSurfaceFormatsKHR(surface)};
140 const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)}; 140 const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)};
141 141
142 const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; 142 const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)};
143 const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; 143 const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)};
144 144
145 u32 requested_image_count{capabilities.minImageCount + 1}; 145 u32 requested_image_count{capabilities.minImageCount + 1};
146 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { 146 if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) {
147 requested_image_count = capabilities.maxImageCount; 147 requested_image_count = capabilities.maxImageCount;
148 } 148 }
149 149
150 vk::SwapchainCreateInfoKHR swapchain_ci( 150 VkSwapchainCreateInfoKHR swapchain_ci;
151 {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1, 151 swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
152 vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform, 152 swapchain_ci.pNext = nullptr;
153 vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {}); 153 swapchain_ci.flags = 0;
154 swapchain_ci.surface = surface;
155 swapchain_ci.minImageCount = requested_image_count;
156 swapchain_ci.imageFormat = surface_format.format;
157 swapchain_ci.imageColorSpace = surface_format.colorSpace;
158 swapchain_ci.imageArrayLayers = 1;
159 swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
160 swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
161 swapchain_ci.queueFamilyIndexCount = 0;
162 swapchain_ci.pQueueFamilyIndices = nullptr;
163 swapchain_ci.preTransform = capabilities.currentTransform;
164 swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
165 swapchain_ci.presentMode = present_mode;
166 swapchain_ci.clipped = VK_FALSE;
167 swapchain_ci.oldSwapchain = nullptr;
154 168
155 const u32 graphics_family{device.GetGraphicsFamily()}; 169 const u32 graphics_family{device.GetGraphicsFamily()};
156 const u32 present_family{device.GetPresentFamily()}; 170 const u32 present_family{device.GetPresentFamily()};
157 const std::array<u32, 2> queue_indices{graphics_family, present_family}; 171 const std::array<u32, 2> queue_indices{graphics_family, present_family};
158 if (graphics_family != present_family) { 172 if (graphics_family != present_family) {
159 swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent; 173 swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT;
160 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); 174 swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size());
161 swapchain_ci.pQueueFamilyIndices = queue_indices.data(); 175 swapchain_ci.pQueueFamilyIndices = queue_indices.data();
162 } else { 176 } else {
163 swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; 177 swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
164 } 178 }
165 179
166 // Request the size again to reduce the possibility of a TOCTOU race condition. 180 // Request the size again to reduce the possibility of a TOCTOU race condition.
167 const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld); 181 const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface);
168 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); 182 swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height);
169 // Don't add code within this and the swapchain creation. 183 // Don't add code within this and the swapchain creation.
170 const auto dev{device.GetLogical()}; 184 swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci);
171 swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld);
172 185
173 extent = swapchain_ci.imageExtent; 186 extent = swapchain_ci.imageExtent;
174 current_width = extent.width; 187 current_width = extent.width;
175 current_height = extent.height; 188 current_height = extent.height;
176 current_srgb = srgb; 189 current_srgb = srgb;
177 190
178 images = dev.getSwapchainImagesKHR(*swapchain, dld); 191 images = swapchain.GetImages();
179 image_count = static_cast<u32>(images.size()); 192 image_count = static_cast<u32>(images.size());
180 image_format = surface_format.format; 193 image_format = surface_format.format;
181} 194}
182 195
183void VKSwapchain::CreateSemaphores() { 196void VKSwapchain::CreateSemaphores() {
184 const auto dev{device.GetLogical()};
185 const auto& dld{device.GetDispatchLoader()};
186
187 present_semaphores.resize(image_count); 197 present_semaphores.resize(image_count);
188 for (std::size_t i = 0; i < image_count; i++) { 198 std::generate(present_semaphores.begin(), present_semaphores.end(),
189 present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); 199 [this] { return device.GetLogical().CreateSemaphore(); });
190 }
191} 200}
192 201
193void VKSwapchain::CreateImageViews() { 202void VKSwapchain::CreateImageViews() {
194 const auto dev{device.GetLogical()}; 203 VkImageViewCreateInfo ci;
195 const auto& dld{device.GetDispatchLoader()}; 204 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
205 ci.pNext = nullptr;
206 ci.flags = 0;
207 // ci.image
208 ci.viewType = VK_IMAGE_VIEW_TYPE_2D;
209 ci.format = image_format;
210 ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
211 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
212 ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
213 ci.subresourceRange.baseMipLevel = 0;
214 ci.subresourceRange.levelCount = 1;
215 ci.subresourceRange.baseArrayLayer = 0;
216 ci.subresourceRange.layerCount = 1;
196 217
197 image_views.resize(image_count); 218 image_views.resize(image_count);
198 for (std::size_t i = 0; i < image_count; i++) { 219 for (std::size_t i = 0; i < image_count; i++) {
199 const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D, 220 ci.image = images[i];
200 image_format, {}, 221 image_views[i] = device.GetLogical().CreateImageView(ci);
201 {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1});
202 image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld);
203 } 222 }
204} 223}
205 224
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index 2f3b2ccd5..a35d61345 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -7,7 +7,7 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/renderer_vulkan/declarations.h" 10#include "video_core/renderer_vulkan/wrapper.h"
11 11
12namespace Layout { 12namespace Layout {
13struct FramebufferLayout; 13struct FramebufferLayout;
@@ -20,7 +20,7 @@ class VKFence;
20 20
21class VKSwapchain { 21class VKSwapchain {
22public: 22public:
23 explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device); 23 explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device);
24 ~VKSwapchain(); 24 ~VKSwapchain();
25 25
26 /// Creates (or recreates) the swapchain with a given size. 26 /// Creates (or recreates) the swapchain with a given size.
@@ -31,12 +31,12 @@ public:
31 31
32 /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be 32 /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
33 /// recreated. Takes responsability for the ownership of fence. 33 /// recreated. Takes responsability for the ownership of fence.
34 bool Present(vk::Semaphore render_semaphore, VKFence& fence); 34 bool Present(VkSemaphore render_semaphore, VKFence& fence);
35 35
36 /// Returns true when the framebuffer layout has changed. 36 /// Returns true when the framebuffer layout has changed.
37 bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; 37 bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
38 38
39 const vk::Extent2D& GetSize() const { 39 VkExtent2D GetSize() const {
40 return extent; 40 return extent;
41 } 41 }
42 42
@@ -48,15 +48,15 @@ public:
48 return image_index; 48 return image_index;
49 } 49 }
50 50
51 vk::Image GetImageIndex(std::size_t index) const { 51 VkImage GetImageIndex(std::size_t index) const {
52 return images[index]; 52 return images[index];
53 } 53 }
54 54
55 vk::ImageView GetImageViewIndex(std::size_t index) const { 55 VkImageView GetImageViewIndex(std::size_t index) const {
56 return *image_views[index]; 56 return *image_views[index];
57 } 57 }
58 58
59 vk::Format GetImageFormat() const { 59 VkFormat GetImageFormat() const {
60 return image_format; 60 return image_format;
61 } 61 }
62 62
@@ -65,30 +65,30 @@ public:
65 } 65 }
66 66
67private: 67private:
68 void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, 68 void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height,
69 bool srgb); 69 bool srgb);
70 void CreateSemaphores(); 70 void CreateSemaphores();
71 void CreateImageViews(); 71 void CreateImageViews();
72 72
73 void Destroy(); 73 void Destroy();
74 74
75 const vk::SurfaceKHR surface; 75 const VkSurfaceKHR surface;
76 const VKDevice& device; 76 const VKDevice& device;
77 77
78 UniqueSwapchainKHR swapchain; 78 vk::SwapchainKHR swapchain;
79 79
80 std::size_t image_count{}; 80 std::size_t image_count{};
81 std::vector<vk::Image> images; 81 std::vector<VkImage> images;
82 std::vector<UniqueImageView> image_views; 82 std::vector<vk::ImageView> image_views;
83 std::vector<UniqueFramebuffer> framebuffers; 83 std::vector<vk::Framebuffer> framebuffers;
84 std::vector<VKFence*> fences; 84 std::vector<VKFence*> fences;
85 std::vector<UniqueSemaphore> present_semaphores; 85 std::vector<vk::Semaphore> present_semaphores;
86 86
87 u32 image_index{}; 87 u32 image_index{};
88 u32 frame_index{}; 88 u32 frame_index{};
89 89
90 vk::Format image_format{}; 90 VkFormat image_format{};
91 vk::Extent2D extent{}; 91 VkExtent2D extent{};
92 92
93 u32 current_width{}; 93 u32 current_width{};
94 u32 current_height{}; 94 u32 current_height{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 26175921b..de4c23120 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -17,7 +17,6 @@
17#include "core/memory.h" 17#include "core/memory.h"
18#include "video_core/engines/maxwell_3d.h" 18#include "video_core/engines/maxwell_3d.h"
19#include "video_core/morton.h" 19#include "video_core/morton.h"
20#include "video_core/renderer_vulkan/declarations.h"
21#include "video_core/renderer_vulkan/maxwell_to_vk.h" 20#include "video_core/renderer_vulkan/maxwell_to_vk.h"
22#include "video_core/renderer_vulkan/vk_device.h" 21#include "video_core/renderer_vulkan/vk_device.h"
23#include "video_core/renderer_vulkan/vk_memory_manager.h" 22#include "video_core/renderer_vulkan/vk_memory_manager.h"
@@ -25,6 +24,7 @@
25#include "video_core/renderer_vulkan/vk_scheduler.h" 24#include "video_core/renderer_vulkan/vk_scheduler.h"
26#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 25#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
27#include "video_core/renderer_vulkan/vk_texture_cache.h" 26#include "video_core/renderer_vulkan/vk_texture_cache.h"
27#include "video_core/renderer_vulkan/wrapper.h"
28#include "video_core/surface.h" 28#include "video_core/surface.h"
29#include "video_core/textures/convert.h" 29#include "video_core/textures/convert.h"
30 30
@@ -35,23 +35,22 @@ using VideoCore::MortonSwizzleMode;
35 35
36using Tegra::Texture::SwizzleSource; 36using Tegra::Texture::SwizzleSource;
37using VideoCore::Surface::PixelFormat; 37using VideoCore::Surface::PixelFormat;
38using VideoCore::Surface::SurfaceCompression;
39using VideoCore::Surface::SurfaceTarget; 38using VideoCore::Surface::SurfaceTarget;
40 39
41namespace { 40namespace {
42 41
43vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { 42VkImageType SurfaceTargetToImage(SurfaceTarget target) {
44 switch (target) { 43 switch (target) {
45 case SurfaceTarget::Texture1D: 44 case SurfaceTarget::Texture1D:
46 case SurfaceTarget::Texture1DArray: 45 case SurfaceTarget::Texture1DArray:
47 return vk::ImageType::e1D; 46 return VK_IMAGE_TYPE_1D;
48 case SurfaceTarget::Texture2D: 47 case SurfaceTarget::Texture2D:
49 case SurfaceTarget::Texture2DArray: 48 case SurfaceTarget::Texture2DArray:
50 case SurfaceTarget::TextureCubemap: 49 case SurfaceTarget::TextureCubemap:
51 case SurfaceTarget::TextureCubeArray: 50 case SurfaceTarget::TextureCubeArray:
52 return vk::ImageType::e2D; 51 return VK_IMAGE_TYPE_2D;
53 case SurfaceTarget::Texture3D: 52 case SurfaceTarget::Texture3D:
54 return vk::ImageType::e3D; 53 return VK_IMAGE_TYPE_3D;
55 case SurfaceTarget::TextureBuffer: 54 case SurfaceTarget::TextureBuffer:
56 UNREACHABLE(); 55 UNREACHABLE();
57 return {}; 56 return {};
@@ -60,35 +59,35 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
60 return {}; 59 return {};
61} 60}
62 61
63vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { 62VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) {
64 if (pixel_format < PixelFormat::MaxColorFormat) { 63 if (pixel_format < PixelFormat::MaxColorFormat) {
65 return vk::ImageAspectFlagBits::eColor; 64 return VK_IMAGE_ASPECT_COLOR_BIT;
66 } else if (pixel_format < PixelFormat::MaxDepthFormat) { 65 } else if (pixel_format < PixelFormat::MaxDepthFormat) {
67 return vk::ImageAspectFlagBits::eDepth; 66 return VK_IMAGE_ASPECT_DEPTH_BIT;
68 } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { 67 } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
69 return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; 68 return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
70 } else { 69 } else {
71 UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format)); 70 UNREACHABLE_MSG("Invalid pixel format={}", static_cast<int>(pixel_format));
72 return vk::ImageAspectFlagBits::eColor; 71 return VK_IMAGE_ASPECT_COLOR_BIT;
73 } 72 }
74} 73}
75 74
76vk::ImageViewType GetImageViewType(SurfaceTarget target) { 75VkImageViewType GetImageViewType(SurfaceTarget target) {
77 switch (target) { 76 switch (target) {
78 case SurfaceTarget::Texture1D: 77 case SurfaceTarget::Texture1D:
79 return vk::ImageViewType::e1D; 78 return VK_IMAGE_VIEW_TYPE_1D;
80 case SurfaceTarget::Texture2D: 79 case SurfaceTarget::Texture2D:
81 return vk::ImageViewType::e2D; 80 return VK_IMAGE_VIEW_TYPE_2D;
82 case SurfaceTarget::Texture3D: 81 case SurfaceTarget::Texture3D:
83 return vk::ImageViewType::e3D; 82 return VK_IMAGE_VIEW_TYPE_3D;
84 case SurfaceTarget::Texture1DArray: 83 case SurfaceTarget::Texture1DArray:
85 return vk::ImageViewType::e1DArray; 84 return VK_IMAGE_VIEW_TYPE_1D_ARRAY;
86 case SurfaceTarget::Texture2DArray: 85 case SurfaceTarget::Texture2DArray:
87 return vk::ImageViewType::e2DArray; 86 return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
88 case SurfaceTarget::TextureCubemap: 87 case SurfaceTarget::TextureCubemap:
89 return vk::ImageViewType::eCube; 88 return VK_IMAGE_VIEW_TYPE_CUBE;
90 case SurfaceTarget::TextureCubeArray: 89 case SurfaceTarget::TextureCubeArray:
91 return vk::ImageViewType::eCubeArray; 90 return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY;
92 case SurfaceTarget::TextureBuffer: 91 case SurfaceTarget::TextureBuffer:
93 break; 92 break;
94 } 93 }
@@ -96,71 +95,88 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) {
96 return {}; 95 return {};
97} 96}
98 97
99UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { 98vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params,
99 std::size_t host_memory_size) {
100 // TODO(Rodrigo): Move texture buffer creation to the buffer cache 100 // TODO(Rodrigo): Move texture buffer creation to the buffer cache
101 const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(), 101 VkBufferCreateInfo ci;
102 vk::BufferUsageFlagBits::eUniformTexelBuffer | 102 ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
103 vk::BufferUsageFlagBits::eTransferSrc | 103 ci.pNext = nullptr;
104 vk::BufferUsageFlagBits::eTransferDst, 104 ci.flags = 0;
105 vk::SharingMode::eExclusive, 0, nullptr); 105 ci.size = static_cast<VkDeviceSize>(host_memory_size);
106 const auto dev = device.GetLogical(); 106 ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
107 const auto& dld = device.GetDispatchLoader(); 107 VK_BUFFER_USAGE_TRANSFER_DST_BIT;
108 return dev.createBufferUnique(buffer_ci, nullptr, dld); 108 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
109 ci.queueFamilyIndexCount = 0;
110 ci.pQueueFamilyIndices = nullptr;
111 return device.GetLogical().CreateBuffer(ci);
109} 112}
110 113
111vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, 114VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
112 const SurfaceParams& params, 115 const SurfaceParams& params, VkBuffer buffer,
113 vk::Buffer buffer) { 116 std::size_t host_memory_size) {
114 ASSERT(params.IsBuffer()); 117 ASSERT(params.IsBuffer());
115 118
116 const auto format = 119 VkBufferViewCreateInfo ci;
117 MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; 120 ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
118 return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes()); 121 ci.pNext = nullptr;
122 ci.flags = 0;
123 ci.buffer = buffer;
124 ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
125 ci.offset = 0;
126 ci.range = static_cast<VkDeviceSize>(host_memory_size);
127 return ci;
119} 128}
120 129
121vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { 130VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
122 constexpr auto sample_count = vk::SampleCountFlagBits::e1;
123 constexpr auto tiling = vk::ImageTiling::eOptimal;
124
125 ASSERT(!params.IsBuffer()); 131 ASSERT(!params.IsBuffer());
126 132
127 const auto [format, attachable, storage] = 133 const auto [format, attachable, storage] =
128 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); 134 MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
129 135
130 auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | 136 VkImageCreateInfo ci;
131 vk::ImageUsageFlagBits::eTransferSrc; 137 ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
138 ci.pNext = nullptr;
139 ci.flags = 0;
140 ci.imageType = SurfaceTargetToImage(params.target);
141 ci.format = format;
142 ci.mipLevels = params.num_levels;
143 ci.arrayLayers = static_cast<u32>(params.GetNumLayers());
144 ci.samples = VK_SAMPLE_COUNT_1_BIT;
145 ci.tiling = VK_IMAGE_TILING_OPTIMAL;
146 ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
147 ci.queueFamilyIndexCount = 0;
148 ci.pQueueFamilyIndices = nullptr;
149 ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
150
151 ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
152 VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
132 if (attachable) { 153 if (attachable) {
133 image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment 154 ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
134 : vk::ImageUsageFlagBits::eColorAttachment; 155 : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
135 } 156 }
136 if (storage) { 157 if (storage) {
137 image_usage |= vk::ImageUsageFlagBits::eStorage; 158 ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
138 } 159 }
139 160
140 vk::ImageCreateFlags flags;
141 vk::Extent3D extent;
142 switch (params.target) { 161 switch (params.target) {
143 case SurfaceTarget::TextureCubemap: 162 case SurfaceTarget::TextureCubemap:
144 case SurfaceTarget::TextureCubeArray: 163 case SurfaceTarget::TextureCubeArray:
145 flags |= vk::ImageCreateFlagBits::eCubeCompatible; 164 ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
146 [[fallthrough]]; 165 [[fallthrough]];
147 case SurfaceTarget::Texture1D: 166 case SurfaceTarget::Texture1D:
148 case SurfaceTarget::Texture1DArray: 167 case SurfaceTarget::Texture1DArray:
149 case SurfaceTarget::Texture2D: 168 case SurfaceTarget::Texture2D:
150 case SurfaceTarget::Texture2DArray: 169 case SurfaceTarget::Texture2DArray:
151 extent = vk::Extent3D(params.width, params.height, 1); 170 ci.extent = {params.width, params.height, 1};
152 break; 171 break;
153 case SurfaceTarget::Texture3D: 172 case SurfaceTarget::Texture3D:
154 extent = vk::Extent3D(params.width, params.height, params.depth); 173 ci.extent = {params.width, params.height, params.depth};
155 break; 174 break;
156 case SurfaceTarget::TextureBuffer: 175 case SurfaceTarget::TextureBuffer:
157 UNREACHABLE(); 176 UNREACHABLE();
158 } 177 }
159 178
160 return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent, 179 return ci;
161 params.num_levels, static_cast<u32>(params.GetNumLayers()),
162 sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0,
163 nullptr, vk::ImageLayout::eUndefined);
164} 180}
165 181
166} // Anonymous namespace 182} // Anonymous namespace
@@ -169,19 +185,18 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
169 VKResourceManager& resource_manager, VKMemoryManager& memory_manager, 185 VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
170 VKScheduler& scheduler, VKStagingBufferPool& staging_pool, 186 VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
171 GPUVAddr gpu_addr, const SurfaceParams& params) 187 GPUVAddr gpu_addr, const SurfaceParams& params)
172 : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device}, 188 : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
173 resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, 189 device{device}, resource_manager{resource_manager},
174 staging_pool{staging_pool} { 190 memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
175 if (params.IsBuffer()) { 191 if (params.IsBuffer()) {
176 buffer = CreateBuffer(device, params); 192 buffer = CreateBuffer(device, params, host_memory_size);
177 commit = memory_manager.Commit(*buffer, false); 193 commit = memory_manager.Commit(buffer, false);
178 194
179 const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer); 195 const auto buffer_view_ci =
196 GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size);
180 format = buffer_view_ci.format; 197 format = buffer_view_ci.format;
181 198
182 const auto dev = device.GetLogical(); 199 buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci);
183 const auto& dld = device.GetDispatchLoader();
184 buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld);
185 } else { 200 } else {
186 const auto image_ci = GenerateImageCreateInfo(device, params); 201 const auto image_ci = GenerateImageCreateInfo(device, params);
187 format = image_ci.format; 202 format = image_ci.format;
@@ -219,16 +234,15 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
219 // We can't copy images to buffers inside a renderpass 234 // We can't copy images to buffers inside a renderpass
220 scheduler.RequestOutsideRenderPassOperationContext(); 235 scheduler.RequestOutsideRenderPassOperationContext();
221 236
222 FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, 237 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT,
223 vk::ImageLayout::eTransferSrcOptimal); 238 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
224 239
225 const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 240 const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
226 // TODO(Rodrigo): Do this in a single copy 241 // TODO(Rodrigo): Do this in a single copy
227 for (u32 level = 0; level < params.num_levels; ++level) { 242 for (u32 level = 0; level < params.num_levels; ++level) {
228 scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle, 243 scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle,
229 copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) { 244 copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) {
230 cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy}, 245 cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy);
231 dld);
232 }); 246 });
233 } 247 }
234 scheduler.Finish(); 248 scheduler.Finish();
@@ -255,15 +269,27 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
255 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 269 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
256 270
257 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, 271 scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
258 size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) { 272 size = host_memory_size](vk::CommandBuffer cmdbuf) {
259 const vk::BufferCopy copy(0, 0, size); 273 VkBufferCopy copy;
260 cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); 274 copy.srcOffset = 0;
261 275 copy.dstOffset = 0;
262 cmdbuf.pipelineBarrier( 276 copy.size = size;
263 vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {}, 277 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
264 {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, 278
265 vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)}, 279 VkBufferMemoryBarrier barrier;
266 {}, dld); 280 barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
281 barrier.pNext = nullptr;
282 barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
283 barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
284 barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT;
285 barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT;
286 barrier.srcQueueFamilyIndex = 0;
287 barrier.dstQueueFamilyIndex = 0;
288 barrier.buffer = dst_buffer;
289 barrier.offset = 0;
290 barrier.size = size;
291 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT,
292 0, {}, barrier, {});
267 }); 293 });
268} 294}
269 295
@@ -271,46 +297,49 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
271 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); 297 const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
272 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); 298 std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
273 299
274 FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, 300 FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
275 vk::ImageLayout::eTransferDstOptimal); 301 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
276 302
277 for (u32 level = 0; level < params.num_levels; ++level) { 303 for (u32 level = 0; level < params.num_levels; ++level) {
278 vk::BufferImageCopy copy = GetBufferImageCopy(level); 304 const VkBufferImageCopy copy = GetBufferImageCopy(level);
279 if (image->GetAspectMask() == 305 if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
280 (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { 306 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
281 vk::BufferImageCopy depth = copy; 307 copy](vk::CommandBuffer cmdbuf) {
282 vk::BufferImageCopy stencil = copy; 308 std::array<VkBufferImageCopy, 2> copies = {copy, copy};
283 depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; 309 copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
284 stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; 310 copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
285 scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth, 311 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
286 stencil](auto cmdbuf, auto& dld) { 312 copies);
287 cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
288 {depth, stencil}, dld);
289 }); 313 });
290 } else { 314 } else {
291 scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), 315 scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(),
292 copy](auto cmdbuf, auto& dld) { 316 copy](vk::CommandBuffer cmdbuf) {
293 cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, 317 cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
294 {copy}, dld);
295 }); 318 });
296 } 319 }
297 } 320 }
298} 321}
299 322
300vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { 323VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
301 const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; 324 VkBufferImageCopy copy;
302 const auto compression_type = params.GetCompressionType(); 325 copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted);
303 const std::size_t mip_offset = compression_type == SurfaceCompression::Converted 326 copy.bufferRowLength = 0;
304 ? params.GetConvertedMipmapOffset(level) 327 copy.bufferImageHeight = 0;
305 : params.GetHostMipmapLevelOffset(level); 328 copy.imageSubresource.aspectMask = image->GetAspectMask();
306 329 copy.imageSubresource.mipLevel = level;
307 return vk::BufferImageCopy( 330 copy.imageSubresource.baseArrayLayer = 0;
308 mip_offset, 0, 0, 331 copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers());
309 {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0}, 332 copy.imageOffset.x = 0;
310 {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth}); 333 copy.imageOffset.y = 0;
334 copy.imageOffset.z = 0;
335 copy.imageExtent.width = params.GetMipWidth(level);
336 copy.imageExtent.height = params.GetMipHeight(level);
337 copy.imageExtent.depth =
338 params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
339 return copy;
311} 340}
312 341
313vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { 342VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
314 return {image->GetAspectMask(), 0, params.num_levels, 0, 343 return {image->GetAspectMask(), 0, params.num_levels, 0,
315 static_cast<u32>(params.GetNumLayers())}; 344 static_cast<u32>(params.GetNumLayers())};
316} 345}
@@ -322,12 +351,12 @@ CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surf
322 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, 351 aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
323 base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, 352 base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
324 num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) 353 num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
325 : vk::ImageViewType{}} {} 354 : VK_IMAGE_VIEW_TYPE_1D} {}
326 355
327CachedSurfaceView::~CachedSurfaceView() = default; 356CachedSurfaceView::~CachedSurfaceView() = default;
328 357
329vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, 358VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
330 SwizzleSource z_source, SwizzleSource w_source) { 359 SwizzleSource z_source, SwizzleSource w_source) {
331 const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 360 const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
332 if (last_image_view && last_swizzle == swizzle) { 361 if (last_image_view && last_swizzle == swizzle) {
333 return last_image_view; 362 return last_image_view;
@@ -352,37 +381,45 @@ vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource
352 381
353 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on 382 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
354 // hardware. To emulate this on Vulkan we specify it in the aspect. 383 // hardware. To emulate this on Vulkan we specify it in the aspect.
355 vk::ImageAspectFlags aspect = aspect_mask; 384 VkImageAspectFlags aspect = aspect_mask;
356 if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { 385 if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
357 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); 386 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
358 const bool is_first = x_source == SwizzleSource::R; 387 const bool is_first = x_source == SwizzleSource::R;
359 switch (params.pixel_format) { 388 switch (params.pixel_format) {
360 case VideoCore::Surface::PixelFormat::Z24S8: 389 case VideoCore::Surface::PixelFormat::Z24S8:
361 case VideoCore::Surface::PixelFormat::Z32FS8: 390 case VideoCore::Surface::PixelFormat::Z32FS8:
362 aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil; 391 aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT;
363 break; 392 break;
364 case VideoCore::Surface::PixelFormat::S8Z24: 393 case VideoCore::Surface::PixelFormat::S8Z24:
365 aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth; 394 aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
366 break; 395 break;
367 default: 396 default:
368 aspect = vk::ImageAspectFlagBits::eDepth; 397 aspect = VK_IMAGE_ASPECT_DEPTH_BIT;
369 UNIMPLEMENTED(); 398 UNIMPLEMENTED();
370 } 399 }
371 400
372 // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity 401 // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
373 swizzle_x = vk::ComponentSwizzle::eR; 402 swizzle_x = VK_COMPONENT_SWIZZLE_R;
374 swizzle_y = vk::ComponentSwizzle::eG; 403 swizzle_y = VK_COMPONENT_SWIZZLE_G;
375 swizzle_z = vk::ComponentSwizzle::eB; 404 swizzle_z = VK_COMPONENT_SWIZZLE_B;
376 swizzle_w = vk::ComponentSwizzle::eA; 405 swizzle_w = VK_COMPONENT_SWIZZLE_A;
377 } 406 }
378 407
379 const vk::ImageViewCreateInfo image_view_ci( 408 VkImageViewCreateInfo ci;
380 {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(), 409 ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
381 {swizzle_x, swizzle_y, swizzle_z, swizzle_w}, 410 ci.pNext = nullptr;
382 {aspect, base_level, num_levels, base_layer, num_layers}); 411 ci.flags = 0;
412 ci.image = surface.GetImageHandle();
413 ci.viewType = image_view_type;
414 ci.format = surface.GetImage().GetFormat();
415 ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
416 ci.subresourceRange.aspectMask = aspect;
417 ci.subresourceRange.baseMipLevel = base_level;
418 ci.subresourceRange.levelCount = num_levels;
419 ci.subresourceRange.baseArrayLayer = base_layer;
420 ci.subresourceRange.layerCount = num_layers;
421 image_view = device.GetLogical().CreateImageView(ci);
383 422
384 const auto dev = device.GetLogical();
385 image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader());
386 return last_image_view = *image_view; 423 return last_image_view = *image_view;
387} 424}
388 425
@@ -390,8 +427,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf
390 const VKDevice& device, VKResourceManager& resource_manager, 427 const VKDevice& device, VKResourceManager& resource_manager,
391 VKMemoryManager& memory_manager, VKScheduler& scheduler, 428 VKMemoryManager& memory_manager, VKScheduler& scheduler,
392 VKStagingBufferPool& staging_pool) 429 VKStagingBufferPool& staging_pool)
393 : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager}, 430 : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
394 memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {} 431 resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
432 staging_pool{staging_pool} {}
395 433
396VKTextureCache::~VKTextureCache() = default; 434VKTextureCache::~VKTextureCache() = default;
397 435
@@ -418,25 +456,36 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
418 scheduler.RequestOutsideRenderPassOperationContext(); 456 scheduler.RequestOutsideRenderPassOperationContext();
419 457
420 src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, 458 src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1,
421 vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, 459 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT,
422 vk::ImageLayout::eTransferSrcOptimal); 460 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
423 dst_surface->Transition( 461 dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1,
424 dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, 462 VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
425 vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); 463 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
426 464
427 const vk::ImageSubresourceLayers src_subresource( 465 VkImageCopy copy;
428 src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); 466 copy.srcSubresource.aspectMask = src_surface->GetAspectMask();
429 const vk::ImageSubresourceLayers dst_subresource( 467 copy.srcSubresource.mipLevel = copy_params.source_level;
430 dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers); 468 copy.srcSubresource.baseArrayLayer = copy_params.source_z;
431 const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0); 469 copy.srcSubresource.layerCount = num_layers;
432 const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z); 470 copy.srcOffset.x = copy_params.source_x;
433 const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z); 471 copy.srcOffset.y = copy_params.source_y;
434 const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent); 472 copy.srcOffset.z = 0;
435 const vk::Image src_image = src_surface->GetImageHandle(); 473 copy.dstSubresource.aspectMask = dst_surface->GetAspectMask();
436 const vk::Image dst_image = dst_surface->GetImageHandle(); 474 copy.dstSubresource.mipLevel = copy_params.dest_level;
437 scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) { 475 copy.dstSubresource.baseArrayLayer = dst_base_layer;
438 cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, 476 copy.dstSubresource.layerCount = num_layers;
439 vk::ImageLayout::eTransferDstOptimal, {copy}, dld); 477 copy.dstOffset.x = copy_params.dest_x;
478 copy.dstOffset.y = copy_params.dest_y;
479 copy.dstOffset.z = dst_offset_z;
480 copy.extent.width = copy_params.width;
481 copy.extent.height = copy_params.height;
482 copy.extent.depth = extent_z;
483
484 const VkImage src_image = src_surface->GetImageHandle();
485 const VkImage dst_image = dst_surface->GetImageHandle();
486 scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) {
487 cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
488 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
440 }); 489 });
441} 490}
442 491
@@ -445,25 +494,34 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
445 // We can't blit inside a renderpass 494 // We can't blit inside a renderpass
446 scheduler.RequestOutsideRenderPassOperationContext(); 495 scheduler.RequestOutsideRenderPassOperationContext();
447 496
448 src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer, 497 src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
449 vk::AccessFlagBits::eTransferRead); 498 VK_ACCESS_TRANSFER_READ_BIT);
450 dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer, 499 dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT,
451 vk::AccessFlagBits::eTransferWrite); 500 VK_ACCESS_TRANSFER_WRITE_BIT);
452 501
453 const auto& cfg = copy_config; 502 VkImageBlit blit;
454 const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0); 503 blit.srcSubresource = src_view->GetImageSubresourceLayers();
455 const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1); 504 blit.srcOffsets[0].x = copy_config.src_rect.left;
456 const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0); 505 blit.srcOffsets[0].y = copy_config.src_rect.top;
457 const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1); 506 blit.srcOffsets[0].z = 0;
458 const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right}, 507 blit.srcOffsets[1].x = copy_config.src_rect.right;
459 dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); 508 blit.srcOffsets[1].y = copy_config.src_rect.bottom;
509 blit.srcOffsets[1].z = 1;
510 blit.dstSubresource = dst_view->GetImageSubresourceLayers();
511 blit.dstOffsets[0].x = copy_config.dst_rect.left;
512 blit.dstOffsets[0].y = copy_config.dst_rect.top;
513 blit.dstOffsets[0].z = 0;
514 blit.dstOffsets[1].x = copy_config.dst_rect.right;
515 blit.dstOffsets[1].y = copy_config.dst_rect.bottom;
516 blit.dstOffsets[1].z = 1;
517
460 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; 518 const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
461 519
462 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, 520 scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
463 is_linear](auto cmdbuf, auto& dld) { 521 is_linear](vk::CommandBuffer cmdbuf) {
464 cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, 522 cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
465 vk::ImageLayout::eTransferDstOptimal, {blit}, 523 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit,
466 is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld); 524 is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
467 }); 525 });
468} 526}
469 527
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 22e3d34de..115595f28 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -13,10 +13,10 @@
13#include "common/math_util.h" 13#include "common/math_util.h"
14#include "video_core/gpu.h" 14#include "video_core/gpu.h"
15#include "video_core/rasterizer_cache.h" 15#include "video_core/rasterizer_cache.h"
16#include "video_core/renderer_vulkan/declarations.h"
17#include "video_core/renderer_vulkan/vk_image.h" 16#include "video_core/renderer_vulkan/vk_image.h"
18#include "video_core/renderer_vulkan/vk_memory_manager.h" 17#include "video_core/renderer_vulkan/vk_memory_manager.h"
19#include "video_core/renderer_vulkan/vk_scheduler.h" 18#include "video_core/renderer_vulkan/vk_scheduler.h"
19#include "video_core/renderer_vulkan/wrapper.h"
20#include "video_core/texture_cache/surface_base.h" 20#include "video_core/texture_cache/surface_base.h"
21#include "video_core/texture_cache/texture_cache.h" 21#include "video_core/texture_cache/texture_cache.h"
22#include "video_core/textures/decoders.h" 22#include "video_core/textures/decoders.h"
@@ -60,15 +60,15 @@ public:
60 void UploadTexture(const std::vector<u8>& staging_buffer) override; 60 void UploadTexture(const std::vector<u8>& staging_buffer) override;
61 void DownloadTexture(std::vector<u8>& staging_buffer) override; 61 void DownloadTexture(std::vector<u8>& staging_buffer) override;
62 62
63 void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, 63 void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
64 vk::ImageLayout new_layout) { 64 VkImageLayout new_layout) {
65 image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, 65 image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
66 new_stage_mask, new_access, new_layout); 66 new_stage_mask, new_access, new_layout);
67 } 67 }
68 68
69 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, 69 void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
70 vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, 70 VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
71 vk::ImageLayout new_layout) { 71 VkImageLayout new_layout) {
72 image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, 72 image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
73 new_access, new_layout); 73 new_access, new_layout);
74 } 74 }
@@ -81,15 +81,15 @@ public:
81 return *image; 81 return *image;
82 } 82 }
83 83
84 vk::Image GetImageHandle() const { 84 VkImage GetImageHandle() const {
85 return image->GetHandle(); 85 return *image->GetHandle();
86 } 86 }
87 87
88 vk::ImageAspectFlags GetAspectMask() const { 88 VkImageAspectFlags GetAspectMask() const {
89 return image->GetAspectMask(); 89 return image->GetAspectMask();
90 } 90 }
91 91
92 vk::BufferView GetBufferViewHandle() const { 92 VkBufferView GetBufferViewHandle() const {
93 return *buffer_view; 93 return *buffer_view;
94 } 94 }
95 95
@@ -104,9 +104,9 @@ private:
104 104
105 void UploadImage(const std::vector<u8>& staging_buffer); 105 void UploadImage(const std::vector<u8>& staging_buffer);
106 106
107 vk::BufferImageCopy GetBufferImageCopy(u32 level) const; 107 VkBufferImageCopy GetBufferImageCopy(u32 level) const;
108 108
109 vk::ImageSubresourceRange GetImageSubresourceRange() const; 109 VkImageSubresourceRange GetImageSubresourceRange() const;
110 110
111 Core::System& system; 111 Core::System& system;
112 const VKDevice& device; 112 const VKDevice& device;
@@ -116,11 +116,11 @@ private:
116 VKStagingBufferPool& staging_pool; 116 VKStagingBufferPool& staging_pool;
117 117
118 std::optional<VKImage> image; 118 std::optional<VKImage> image;
119 UniqueBuffer buffer; 119 vk::Buffer buffer;
120 UniqueBufferView buffer_view; 120 vk::BufferView buffer_view;
121 VKMemoryCommit commit; 121 VKMemoryCommit commit;
122 122
123 vk::Format format; 123 VkFormat format = VK_FORMAT_UNDEFINED;
124}; 124};
125 125
126class CachedSurfaceView final : public VideoCommon::ViewBase { 126class CachedSurfaceView final : public VideoCommon::ViewBase {
@@ -129,16 +129,16 @@ public:
129 const ViewParams& params, bool is_proxy); 129 const ViewParams& params, bool is_proxy);
130 ~CachedSurfaceView(); 130 ~CachedSurfaceView();
131 131
132 vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source, 132 VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
133 Tegra::Texture::SwizzleSource y_source, 133 Tegra::Texture::SwizzleSource y_source,
134 Tegra::Texture::SwizzleSource z_source, 134 Tegra::Texture::SwizzleSource z_source,
135 Tegra::Texture::SwizzleSource w_source); 135 Tegra::Texture::SwizzleSource w_source);
136 136
137 bool IsSameSurface(const CachedSurfaceView& rhs) const { 137 bool IsSameSurface(const CachedSurfaceView& rhs) const {
138 return &surface == &rhs.surface; 138 return &surface == &rhs.surface;
139 } 139 }
140 140
141 vk::ImageView GetHandle() { 141 VkImageView GetHandle() {
142 return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, 142 return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
143 Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); 143 Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
144 } 144 }
@@ -159,24 +159,24 @@ public:
159 return buffer_view; 159 return buffer_view;
160 } 160 }
161 161
162 vk::Image GetImage() const { 162 VkImage GetImage() const {
163 return image; 163 return image;
164 } 164 }
165 165
166 vk::BufferView GetBufferView() const { 166 VkBufferView GetBufferView() const {
167 return buffer_view; 167 return buffer_view;
168 } 168 }
169 169
170 vk::ImageSubresourceRange GetImageSubresourceRange() const { 170 VkImageSubresourceRange GetImageSubresourceRange() const {
171 return {aspect_mask, base_level, num_levels, base_layer, num_layers}; 171 return {aspect_mask, base_level, num_levels, base_layer, num_layers};
172 } 172 }
173 173
174 vk::ImageSubresourceLayers GetImageSubresourceLayers() const { 174 VkImageSubresourceLayers GetImageSubresourceLayers() const {
175 return {surface.GetAspectMask(), base_level, base_layer, num_layers}; 175 return {surface.GetAspectMask(), base_level, base_layer, num_layers};
176 } 176 }
177 177
178 void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask, 178 void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask,
179 vk::AccessFlags new_access) const { 179 VkAccessFlags new_access) const {
180 surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, 180 surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
181 new_access, new_layout); 181 new_access, new_layout);
182 } 182 }
@@ -196,9 +196,9 @@ private:
196 196
197 // Store a copy of these values to avoid double dereference when reading them 197 // Store a copy of these values to avoid double dereference when reading them
198 const SurfaceParams params; 198 const SurfaceParams params;
199 const vk::Image image; 199 const VkImage image;
200 const vk::BufferView buffer_view; 200 const VkBufferView buffer_view;
201 const vk::ImageAspectFlags aspect_mask; 201 const VkImageAspectFlags aspect_mask;
202 202
203 const VKDevice& device; 203 const VKDevice& device;
204 CachedSurface& surface; 204 CachedSurface& surface;
@@ -206,12 +206,12 @@ private:
206 const u32 num_layers; 206 const u32 num_layers;
207 const u32 base_level; 207 const u32 base_level;
208 const u32 num_levels; 208 const u32 num_levels;
209 const vk::ImageViewType image_view_type; 209 const VkImageViewType image_view_type;
210 210
211 vk::ImageView last_image_view; 211 VkImageView last_image_view = nullptr;
212 u32 last_swizzle{}; 212 u32 last_swizzle = 0;
213 213
214 std::unordered_map<u32, UniqueImageView> view_cache; 214 std::unordered_map<u32, vk::ImageView> view_cache;
215}; 215};
216 216
217class VKTextureCache final : public TextureCacheBase { 217class VKTextureCache final : public TextureCacheBase {
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
index 0e577b9ff..4bfec0077 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp
@@ -7,10 +7,10 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "video_core/renderer_vulkan/declarations.h"
11#include "video_core/renderer_vulkan/vk_device.h" 10#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h" 11#include "video_core/renderer_vulkan/vk_scheduler.h"
13#include "video_core/renderer_vulkan/vk_update_descriptor.h" 12#include "video_core/renderer_vulkan/vk_update_descriptor.h"
13#include "video_core/renderer_vulkan/wrapper.h"
14 14
15namespace Vulkan { 15namespace Vulkan {
16 16
@@ -27,8 +27,8 @@ void VKUpdateDescriptorQueue::Acquire() {
27 entries.clear(); 27 entries.clear();
28} 28}
29 29
30void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template, 30void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template,
31 vk::DescriptorSet set) { 31 VkDescriptorSet set) {
32 if (payload.size() + entries.size() >= payload.max_size()) { 32 if (payload.size() + entries.size() >= payload.max_size()) {
33 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); 33 LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
34 scheduler.WaitWorker(); 34 scheduler.WaitWorker();
@@ -37,21 +37,21 @@ void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template,
37 37
38 const auto payload_start = payload.data() + payload.size(); 38 const auto payload_start = payload.data() + payload.size();
39 for (const auto& entry : entries) { 39 for (const auto& entry : entries) {
40 if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) { 40 if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
41 payload.push_back(*image); 41 payload.push_back(*image);
42 } else if (const auto buffer = std::get_if<Buffer>(&entry)) { 42 } else if (const auto buffer = std::get_if<Buffer>(&entry)) {
43 payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); 43 payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size);
44 } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) { 44 } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
45 payload.push_back(*texel); 45 payload.push_back(*texel);
46 } else { 46 } else {
47 UNREACHABLE(); 47 UNREACHABLE();
48 } 48 }
49 } 49 }
50 50
51 scheduler.Record([dev = device.GetLogical(), payload_start, set, 51 scheduler.Record(
52 update_template]([[maybe_unused]] auto cmdbuf, auto& dld) { 52 [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) {
53 dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld); 53 logical->UpdateDescriptorSet(set, update_template, payload_start);
54 }); 54 });
55} 55}
56 56
57} // namespace Vulkan 57} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h
index 8c825aa29..a9e3d5dba 100644
--- a/src/video_core/renderer_vulkan/vk_update_descriptor.h
+++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h
@@ -9,7 +9,7 @@
9#include <boost/container/static_vector.hpp> 9#include <boost/container/static_vector.hpp>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "video_core/renderer_vulkan/declarations.h" 12#include "video_core/renderer_vulkan/wrapper.h"
13 13
14namespace Vulkan { 14namespace Vulkan {
15 15
@@ -20,18 +20,18 @@ class DescriptorUpdateEntry {
20public: 20public:
21 explicit DescriptorUpdateEntry() : image{} {} 21 explicit DescriptorUpdateEntry() : image{} {}
22 22
23 DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {} 23 DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
24 24
25 DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size) 25 DescriptorUpdateEntry(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size)
26 : buffer{buffer, offset, size} {} 26 : buffer{buffer, offset, size} {}
27 27
28 DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {} 28 DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
29 29
30private: 30private:
31 union { 31 union {
32 vk::DescriptorImageInfo image; 32 VkDescriptorImageInfo image;
33 vk::DescriptorBufferInfo buffer; 33 VkDescriptorBufferInfo buffer;
34 vk::BufferView texel_buffer; 34 VkBufferView texel_buffer;
35 }; 35 };
36}; 36};
37 37
@@ -44,37 +44,35 @@ public:
44 44
45 void Acquire(); 45 void Acquire();
46 46
47 void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set); 47 void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set);
48 48
49 void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) { 49 void AddSampledImage(VkSampler sampler, VkImageView image_view) {
50 entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}}); 50 entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}});
51 } 51 }
52 52
53 void AddImage(vk::ImageView image_view) { 53 void AddImage(VkImageView image_view) {
54 entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}}); 54 entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
55 } 55 }
56 56
57 void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) { 57 void AddBuffer(const VkBuffer* buffer, u64 offset, std::size_t size) {
58 entries.push_back(Buffer{buffer, offset, size}); 58 entries.push_back(Buffer{buffer, offset, size});
59 } 59 }
60 60
61 void AddTexelBuffer(vk::BufferView texel_buffer) { 61 void AddTexelBuffer(VkBufferView texel_buffer) {
62 entries.emplace_back(texel_buffer); 62 entries.emplace_back(texel_buffer);
63 } 63 }
64 64
65 vk::ImageLayout* GetLastImageLayout() { 65 VkImageLayout* GetLastImageLayout() {
66 return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout; 66 return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout;
67 } 67 }
68 68
69private: 69private:
70 struct Buffer { 70 struct Buffer {
71 const vk::Buffer* buffer{}; 71 const VkBuffer* buffer = nullptr;
72 u64 offset{}; 72 u64 offset = 0;
73 std::size_t size{}; 73 std::size_t size = 0;
74 }; 74 };
75 using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>; 75 using Variant = std::variant<VkDescriptorImageInfo, Buffer, VkBufferView>;
76 // Old gcc versions don't consider this trivially copyable.
77 // static_assert(std::is_trivially_copyable_v<Variant>);
78 76
79 const VKDevice& device; 77 const VKDevice& device;
80 VKScheduler& scheduler; 78 VKScheduler& scheduler;
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 478394682..4db329fa5 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
136 SetRegister(bb, instr.gpr0, value); 136 SetRegister(bb, instr.gpr0, value);
137 break; 137 break;
138 } 138 }
139 case OpCode::Id::FCMP_R: { 139 case OpCode::Id::FCMP_RR:
140 case OpCode::Id::FCMP_RC: {
140 UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); 141 UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
141 Node op_c = GetRegister(instr.gpr39); 142 Node op_c = GetRegister(instr.gpr39);
142 Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); 143 Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index c72690b2b..b9989c88c 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -2,6 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <limits>
6#include <optional>
7#include <utility>
8
5#include "common/assert.h" 9#include "common/assert.h"
6#include "common/common_types.h" 10#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h" 11#include "video_core/engines/shader_bytecode.h"
@@ -15,9 +19,49 @@ using Tegra::Shader::OpCode;
15using Tegra::Shader::Register; 19using Tegra::Shader::Register;
16 20
17namespace { 21namespace {
22
18constexpr OperationCode GetFloatSelector(u64 selector) { 23constexpr OperationCode GetFloatSelector(u64 selector) {
19 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; 24 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
20} 25}
26
27constexpr u32 SizeInBits(Register::Size size) {
28 switch (size) {
29 case Register::Size::Byte:
30 return 8;
31 case Register::Size::Short:
32 return 16;
33 case Register::Size::Word:
34 return 32;
35 case Register::Size::Long:
36 return 64;
37 }
38 return 0;
39}
40
41constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
42 Register::Size dst_size,
43 bool src_signed,
44 bool dst_signed) {
45 const u32 dst_bits = SizeInBits(dst_size);
46 if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
47 if (src_signed == dst_signed) {
48 return std::nullopt;
49 }
50 return std::make_pair(0, std::numeric_limits<s32>::max());
51 }
52 if (dst_signed) {
53 // Signed destination, clamp to [-128, 127] for instance
54 return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
55 } else {
56 // Unsigned destination
57 if (dst_bits == 32) {
58 // Avoid shifting by 32, that is undefined behavior
59 return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
60 }
61 return std::make_pair(0, (1 << dst_bits) - 1);
62 }
63}
64
21} // Anonymous namespace 65} // Anonymous namespace
22 66
23u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { 67u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
@@ -28,14 +72,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
28 case OpCode::Id::I2I_R: 72 case OpCode::Id::I2I_R:
29 case OpCode::Id::I2I_C: 73 case OpCode::Id::I2I_C:
30 case OpCode::Id::I2I_IMM: { 74 case OpCode::Id::I2I_IMM: {
31 UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); 75 const bool src_signed = instr.conversion.is_input_signed;
32 UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); 76 const bool dst_signed = instr.conversion.is_output_signed;
33 UNIMPLEMENTED_IF(instr.alu.saturate_d); 77 const Register::Size src_size = instr.conversion.src_size;
78 const Register::Size dst_size = instr.conversion.dst_size;
79 const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
34 80
35 const bool input_signed = instr.conversion.is_input_signed; 81 Node value = [this, instr, opcode] {
36 const bool output_signed = instr.conversion.is_output_signed;
37
38 Node value = [&]() {
39 switch (opcode->get().GetId()) { 82 switch (opcode->get().GetId()) {
40 case OpCode::Id::I2I_R: 83 case OpCode::Id::I2I_R:
41 return GetRegister(instr.gpr20); 84 return GetRegister(instr.gpr20);
@@ -48,16 +91,60 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
48 return Immediate(0); 91 return Immediate(0);
49 } 92 }
50 }(); 93 }();
51 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
52 94
53 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, 95 // Ensure the source selector is valid
54 input_signed); 96 switch (instr.conversion.src_size) {
55 if (input_signed != output_signed) { 97 case Register::Size::Byte:
56 value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); 98 break;
99 case Register::Size::Short:
100 ASSERT(selector == 0 || selector == 2);
101 break;
102 default:
103 ASSERT(selector == 0);
104 break;
105 }
106
107 if (src_size != Register::Size::Word || selector != 0) {
108 value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
109 Immediate(selector * 8), Immediate(SizeInBits(src_size)));
110 }
111
112 value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
113 instr.conversion.negate_a, src_signed);
114
115 if (instr.alu.saturate_d) {
116 if (src_signed && !dst_signed) {
117 Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
118 Immediate(1 << (SizeInBits(src_size) - 1)));
119 value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
120 std::move(value));
121
122 // Simplify generated expressions, this can be removed without semantic impact
123 SetTemporary(bb, 0, std::move(value));
124 value = GetTemporary(0);
125
126 if (dst_size != Register::Size::Word) {
127 const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
128 Node is_large =
129 Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
130 value = Operation(OperationCode::Select, std::move(is_large), limit,
131 std::move(value));
132 }
133 } else if (const std::optional bounds =
134 IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
135 value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
136 Immediate(bounds->first));
137 value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
138 Immediate(bounds->second));
139 }
140 } else if (dst_size != Register::Size::Word) {
141 // No saturation, we only have to mask the result
142 Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
143 value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
57 } 144 }
58 145
59 SetInternalFlagsFromInteger(bb, value, instr.generates_cc); 146 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
60 SetRegister(bb, instr.gpr0, value); 147 SetRegister(bb, instr.gpr0, std::move(value));
61 break; 148 break;
62 } 149 }
63 case OpCode::Id::I2F_R: 150 case OpCode::Id::I2F_R:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d2fe4ec5d..0dd7a1196 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -13,13 +13,247 @@
13#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h" 14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h" 15#include "video_core/shader/shader_ir.h"
16#include "video_core/textures/texture.h"
16 17
17namespace VideoCommon::Shader { 18namespace VideoCommon::Shader {
18 19
19using Tegra::Shader::Instruction; 20using Tegra::Shader::Instruction;
20using Tegra::Shader::OpCode; 21using Tegra::Shader::OpCode;
22using Tegra::Shader::PredCondition;
23using Tegra::Shader::StoreType;
24using Tegra::Texture::ComponentType;
25using Tegra::Texture::TextureFormat;
26using Tegra::Texture::TICEntry;
21 27
22namespace { 28namespace {
29
30ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
31 std::size_t component) {
32 const TextureFormat format{descriptor.format};
33 switch (format) {
34 case TextureFormat::R16_G16_B16_A16:
35 case TextureFormat::R32_G32_B32_A32:
36 case TextureFormat::R32_G32_B32:
37 case TextureFormat::R32_G32:
38 case TextureFormat::R16_G16:
39 case TextureFormat::R32:
40 case TextureFormat::R16:
41 case TextureFormat::R8:
42 case TextureFormat::R1:
43 if (component == 0) {
44 return descriptor.r_type;
45 }
46 if (component == 1) {
47 return descriptor.g_type;
48 }
49 if (component == 2) {
50 return descriptor.b_type;
51 }
52 if (component == 3) {
53 return descriptor.a_type;
54 }
55 break;
56 case TextureFormat::A8R8G8B8:
57 if (component == 0) {
58 return descriptor.a_type;
59 }
60 if (component == 1) {
61 return descriptor.r_type;
62 }
63 if (component == 2) {
64 return descriptor.g_type;
65 }
66 if (component == 3) {
67 return descriptor.b_type;
68 }
69 break;
70 case TextureFormat::A2B10G10R10:
71 case TextureFormat::A4B4G4R4:
72 case TextureFormat::A5B5G5R1:
73 case TextureFormat::A1B5G5R5:
74 if (component == 0) {
75 return descriptor.a_type;
76 }
77 if (component == 1) {
78 return descriptor.b_type;
79 }
80 if (component == 2) {
81 return descriptor.g_type;
82 }
83 if (component == 3) {
84 return descriptor.r_type;
85 }
86 break;
87 case TextureFormat::R32_B24G8:
88 if (component == 0) {
89 return descriptor.r_type;
90 }
91 if (component == 1) {
92 return descriptor.b_type;
93 }
94 if (component == 2) {
95 return descriptor.g_type;
96 }
97 break;
98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5:
100 if (component == 0) {
101 return descriptor.b_type;
102 }
103 if (component == 1) {
104 return descriptor.g_type;
105 }
106 if (component == 2) {
107 return descriptor.r_type;
108 }
109 break;
110 case TextureFormat::G8R24:
111 case TextureFormat::G24R8:
112 case TextureFormat::G8R8:
113 case TextureFormat::G4R4:
114 if (component == 0) {
115 return descriptor.g_type;
116 }
117 if (component == 1) {
118 return descriptor.r_type;
119 }
120 break;
121 }
122 UNIMPLEMENTED_MSG("texture format not implement={}", format);
123 return ComponentType::FLOAT;
124}
125
126bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
127 constexpr u8 R = 0b0001;
128 constexpr u8 G = 0b0010;
129 constexpr u8 B = 0b0100;
130 constexpr u8 A = 0b1000;
131 constexpr std::array<u8, 16> mask = {
132 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
133 (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
134 return std::bitset<4>{mask.at(component_mask)}.test(component);
135}
136
137u32 GetComponentSize(TextureFormat format, std::size_t component) {
138 switch (format) {
139 case TextureFormat::R32_G32_B32_A32:
140 return 32;
141 case TextureFormat::R16_G16_B16_A16:
142 return 16;
143 case TextureFormat::R32_G32_B32:
144 return component <= 2 ? 32 : 0;
145 case TextureFormat::R32_G32:
146 return component <= 1 ? 32 : 0;
147 case TextureFormat::R16_G16:
148 return component <= 1 ? 16 : 0;
149 case TextureFormat::R32:
150 return component == 0 ? 32 : 0;
151 case TextureFormat::R16:
152 return component == 0 ? 16 : 0;
153 case TextureFormat::R8:
154 return component == 0 ? 8 : 0;
155 case TextureFormat::R1:
156 return component == 0 ? 1 : 0;
157 case TextureFormat::A8R8G8B8:
158 return 8;
159 case TextureFormat::A2B10G10R10:
160 return (component == 3 || component == 2 || component == 1) ? 10 : 2;
161 case TextureFormat::A4B4G4R4:
162 return 4;
163 case TextureFormat::A5B5G5R1:
164 return (component == 0 || component == 1 || component == 2) ? 5 : 1;
165 case TextureFormat::A1B5G5R5:
166 return (component == 1 || component == 2 || component == 3) ? 5 : 1;
167 case TextureFormat::R32_B24G8:
168 if (component == 0) {
169 return 32;
170 }
171 if (component == 1) {
172 return 24;
173 }
174 if (component == 2) {
175 return 8;
176 }
177 return 0;
178 case TextureFormat::B5G6R5:
179 if (component == 0 || component == 2) {
180 return 5;
181 }
182 if (component == 1) {
183 return 6;
184 }
185 return 0;
186 case TextureFormat::B6G5R5:
187 if (component == 1 || component == 2) {
188 return 5;
189 }
190 if (component == 0) {
191 return 6;
192 }
193 return 0;
194 case TextureFormat::G8R24:
195 if (component == 0) {
196 return 8;
197 }
198 if (component == 1) {
199 return 24;
200 }
201 return 0;
202 case TextureFormat::G24R8:
203 if (component == 0) {
204 return 8;
205 }
206 if (component == 1) {
207 return 24;
208 }
209 return 0;
210 case TextureFormat::G8R8:
211 return (component == 0 || component == 1) ? 8 : 0;
212 case TextureFormat::G4R4:
213 return (component == 0 || component == 1) ? 4 : 0;
214 default:
215 UNIMPLEMENTED_MSG("texture format not implement={}", format);
216 return 0;
217 }
218}
219
220std::size_t GetImageComponentMask(TextureFormat format) {
221 constexpr u8 R = 0b0001;
222 constexpr u8 G = 0b0010;
223 constexpr u8 B = 0b0100;
224 constexpr u8 A = 0b1000;
225 switch (format) {
226 case TextureFormat::R32_G32_B32_A32:
227 case TextureFormat::R16_G16_B16_A16:
228 case TextureFormat::A8R8G8B8:
229 case TextureFormat::A2B10G10R10:
230 case TextureFormat::A4B4G4R4:
231 case TextureFormat::A5B5G5R1:
232 case TextureFormat::A1B5G5R5:
233 return std::size_t{R | G | B | A};
234 case TextureFormat::R32_G32_B32:
235 case TextureFormat::R32_B24G8:
236 case TextureFormat::B5G6R5:
237 case TextureFormat::B6G5R5:
238 return std::size_t{R | G | B};
239 case TextureFormat::R32_G32:
240 case TextureFormat::R16_G16:
241 case TextureFormat::G8R24:
242 case TextureFormat::G24R8:
243 case TextureFormat::G8R8:
244 case TextureFormat::G4R4:
245 return std::size_t{R | G};
246 case TextureFormat::R32:
247 case TextureFormat::R16:
248 case TextureFormat::R8:
249 case TextureFormat::R1:
250 return std::size_t{R};
251 default:
252 UNIMPLEMENTED_MSG("texture format not implement={}", format);
253 return std::size_t{R | G | B | A};
254 }
255}
256
23std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { 257std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
24 switch (image_type) { 258 switch (image_type) {
25 case Tegra::Shader::ImageType::Texture1D: 259 case Tegra::Shader::ImageType::Texture1D:
@@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
37} 271}
38} // Anonymous namespace 272} // Anonymous namespace
39 273
274std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
275 Node original_value) {
276 switch (component_type) {
277 case ComponentType::SNORM: {
278 // range [-1.0, 1.0]
279 auto cnv_value = Operation(OperationCode::FMul, original_value,
280 Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
281 cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
282 return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
283 }
284 case ComponentType::SINT:
285 case ComponentType::UNORM: {
286 bool is_signed = component_type == ComponentType::SINT;
287 // range [0.0, 1.0]
288 auto cnv_value = Operation(OperationCode::FMul, original_value,
289 Immediate(static_cast<float>(1 << component_size) - 1.f));
290 return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
291 is_signed};
292 }
293 case ComponentType::UINT: // range [0, (1 << component_size) - 1]
294 return {std::move(original_value), false};
295 case ComponentType::FLOAT:
296 if (component_size == 16) {
297 return {Operation(OperationCode::HCastFloat, original_value), true};
298 } else {
299 return {std::move(original_value), true};
300 }
301 default:
302 UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
303 return {std::move(original_value), true};
304 }
305}
306
40u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { 307u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
41 const Instruction instr = {program_code[pc]}; 308 const Instruction instr = {program_code[pc]};
42 const auto opcode = OpCode::Decode(instr); 309 const auto opcode = OpCode::Decode(instr);
@@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
53 320
54 switch (opcode->get().GetId()) { 321 switch (opcode->get().GetId()) {
55 case OpCode::Id::SULD: { 322 case OpCode::Id::SULD: {
56 UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
57 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != 323 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
58 Tegra::Shader::OutOfBoundsStore::Ignore); 324 Tegra::Shader::OutOfBoundsStore::Ignore);
59 325
@@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
62 : GetBindlessImage(instr.gpr39, type)}; 328 : GetBindlessImage(instr.gpr39, type)};
63 image.MarkRead(); 329 image.MarkRead();
64 330
65 u32 indexer = 0; 331 if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
66 for (u32 element = 0; element < 4; ++element) { 332 u32 indexer = 0;
67 if (!instr.suldst.IsComponentEnabled(element)) { 333 for (u32 element = 0; element < 4; ++element) {
68 continue; 334 if (!instr.suldst.IsComponentEnabled(element)) {
335 continue;
336 }
337 MetaImage meta{image, {}, element};
338 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
339 SetTemporary(bb, indexer++, std::move(value));
340 }
341 for (u32 i = 0; i < indexer; ++i) {
342 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
343 }
344 } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
345 UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
346 instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
347
348 auto descriptor = [this, instr] {
349 std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
350 if (instr.suldst.is_immediate) {
351 descriptor =
352 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
353 } else {
354 const Node image_register = GetRegister(instr.gpr39);
355 const auto [base_image, buffer, offset] = TrackCbuf(
356 image_register, global_code, static_cast<s64>(global_code.size()));
357 descriptor = registry.ObtainBindlessSampler(buffer, offset);
358 }
359 if (!descriptor) {
360 UNREACHABLE_MSG("Failed to obtain image descriptor");
361 }
362 return *descriptor;
363 }();
364
365 const auto comp_mask = GetImageComponentMask(descriptor.format);
366
367 switch (instr.suldst.GetStoreDataLayout()) {
368 case StoreType::Bits32:
369 case StoreType::Bits64: {
370 u32 indexer = 0;
371 u32 shifted_counter = 0;
372 Node value = Immediate(0);
373 for (u32 element = 0; element < 4; ++element) {
374 if (!IsComponentEnabled(comp_mask, element)) {
375 continue;
376 }
377 const auto component_type = GetComponentType(descriptor, element);
378 const auto component_size = GetComponentSize(descriptor.format, element);
379 MetaImage meta{image, {}, element};
380
381 auto [converted_value, is_signed] = GetComponentValue(
382 component_type, component_size,
383 Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
384
385 // shift element to correct position
386 const auto shifted = shifted_counter;
387 if (shifted > 0) {
388 converted_value =
389 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
390 std::move(converted_value), Immediate(shifted));
391 }
392 shifted_counter += component_size;
393
394 // add value into result
395 value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
396
397 // if we shifted enough for 1 byte -> we save it into temp
398 if (shifted_counter >= 32) {
399 SetTemporary(bb, indexer++, std::move(value));
400 // reset counter and value to prepare pack next byte
401 value = Immediate(0);
402 shifted_counter = 0;
403 }
404 }
405 for (u32 i = 0; i < indexer; ++i) {
406 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
407 }
408 break;
409 }
410 default:
411 UNREACHABLE();
412 break;
69 } 413 }
70 MetaImage meta{image, {}, element};
71 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
72 SetTemporary(bb, indexer++, std::move(value));
73 }
74 for (u32 i = 0; i < indexer; ++i) {
75 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
76 } 414 }
77 break; 415 break;
78 } 416 }
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 4944e9d69..d4f95b18c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -11,12 +11,17 @@
11 11
12namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
13 13
14using std::move;
14using Tegra::Shader::ConditionCode; 15using Tegra::Shader::ConditionCode;
15using Tegra::Shader::Instruction; 16using Tegra::Shader::Instruction;
17using Tegra::Shader::IpaInterpMode;
16using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::PixelImap;
17using Tegra::Shader::Register; 20using Tegra::Shader::Register;
18using Tegra::Shader::SystemVariable; 21using Tegra::Shader::SystemVariable;
19 22
23using Index = Tegra::Shader::Attribute::Index;
24
20u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { 25u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]}; 26 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr); 27 const auto opcode = OpCode::Decode(instr);
@@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
66 bb.push_back(Operation(OperationCode::Discard)); 71 bb.push_back(Operation(OperationCode::Discard));
67 break; 72 break;
68 } 73 }
69 case OpCode::Id::MOV_SYS: { 74 case OpCode::Id::S2R: {
70 const Node value = [this, instr] { 75 const Node value = [this, instr] {
71 switch (instr.sys20) { 76 switch (instr.sys20) {
72 case SystemVariable::LaneId: 77 case SystemVariable::LaneId:
73 LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); 78 LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
74 return Immediate(0U); 79 return Immediate(0U);
75 case SystemVariable::InvocationId: 80 case SystemVariable::InvocationId:
76 return Operation(OperationCode::InvocationId); 81 return Operation(OperationCode::InvocationId);
77 case SystemVariable::Ydirection: 82 case SystemVariable::Ydirection:
78 return Operation(OperationCode::YNegate); 83 return Operation(OperationCode::YNegate);
79 case SystemVariable::InvocationInfo: 84 case SystemVariable::InvocationInfo:
80 LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); 85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
86 return Immediate(0U);
87 case SystemVariable::WscaleFactorXY:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
89 return Immediate(0U);
90 case SystemVariable::WscaleFactorZ:
91 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
81 return Immediate(0U); 92 return Immediate(0U);
82 case SystemVariable::Tid: { 93 case SystemVariable::Tid: {
83 Node value = Immediate(0); 94 Node value = Immediate(0);
@@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
213 } 224 }
214 case OpCode::Id::IPA: { 225 case OpCode::Id::IPA: {
215 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; 226 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
216
217 const auto attribute = instr.attribute.fmt28; 227 const auto attribute = instr.attribute.fmt28;
218 const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), 228 const Index index = attribute.index;
219 instr.ipa.sample_mode.Value()};
220 229
221 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) 230 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
222 : GetInputAttribute(attribute.index, attribute.element); 231 : GetInputAttribute(index, attribute.element);
223 const Tegra::Shader::Attribute::Index index = attribute.index.Value(); 232
224 const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && 233 // Code taken from Ryujinx.
225 index <= Tegra::Shader::Attribute::Index::Attribute_31; 234 if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
226 if (is_generic || is_physical) { 235 const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
227 // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. 236 if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
228 // In theory by setting them as perspective, OpenGL does the perspective correction. 237 Node position_w = GetInputAttribute(Index::Position, 3);
229 // A way must figured to reverse the last step of it. 238 value = Operation(OperationCode::FMul, move(value), move(position_w));
230 if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
231 value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
232 } 239 }
233 } 240 }
234 value = GetSaturatedFloat(value, instr.ipa.saturate);
235 241
236 SetRegister(bb, instr.gpr0, value); 242 if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
243 value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
244 }
245
246 value = GetSaturatedFloat(move(value), instr.ipa.saturate);
247
248 SetRegister(bb, instr.gpr0, move(value));
237 break; 249 break;
238 } 250 }
239 case OpCode::Id::OUT_R: { 251 case OpCode::Id::OUT_R: {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 48350e042..6c4a1358b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
780 // When lod is used always is in gpr20 780 // When lod is used always is in gpr20
781 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); 781 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
782 782
783 // Fill empty entries from the guest sampler
784 const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
785 if (type_coord_count != entry_coord_count) {
786 LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
787
788 // When the size is higher we insert zeroes
789 for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
790 coords.push_back(GetRegister(Register::ZeroIndex));
791 }
792
793 // Then we ensure the size matches the number of entries (dropping unused values)
794 coords.resize(entry_coord_count);
795 }
796
797 Node4 values; 783 Node4 values;
798 for (u32 element = 0; element < values.size(); ++element) { 784 for (u32 element = 0; element < values.size(); ++element) {
799 auto coords_copy = coords; 785 auto coords_copy = coords;
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b047cf870..64ba60ea2 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -10,16 +10,24 @@
10 10
11namespace VideoCommon::Shader { 11namespace VideoCommon::Shader {
12 12
13using std::move;
13using Tegra::Shader::Instruction; 14using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 15using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 16using Tegra::Shader::Pred;
16using Tegra::Shader::VideoType; 17using Tegra::Shader::VideoType;
17using Tegra::Shader::VmadShr; 18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
18 21
19u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { 22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]}; 23 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr); 24 const auto opcode = OpCode::Decode(instr);
22 25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
23 const Node op_a = 31 const Node op_a =
24 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, 32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
25 instr.video.type_a, instr.video.byte_height_a); 33 instr.video.type_a, instr.video.byte_height_a);
@@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
109 } 117 }
110} 118}
111 119
120void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
121 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
124 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
125 UNIMPLEMENTED_IF(instr.vmnmx.sat);
126 UNIMPLEMENTED_IF(instr.generates_cc);
127
128 Node op_a = GetRegister(instr.gpr8);
129 Node op_b = GetRegister(instr.gpr20);
130 Node op_c = GetRegister(instr.gpr39);
131
132 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
133 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
134
135 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
136 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
137
138 switch (instr.vmnmx.operation) {
139 case VmnmxOperation::Mrg_16H:
140 value = BitfieldInsert(move(op_c), move(value), 16, 16);
141 break;
142 case VmnmxOperation::Mrg_16L:
143 value = BitfieldInsert(move(op_c), move(value), 0, 16);
144 break;
145 case VmnmxOperation::Mrg_8B0:
146 value = BitfieldInsert(move(op_c), move(value), 0, 8);
147 break;
148 case VmnmxOperation::Mrg_8B2:
149 value = BitfieldInsert(move(op_c), move(value), 16, 8);
150 break;
151 case VmnmxOperation::Acc:
152 value = Operation(OperationCode::IAdd, move(value), move(op_c));
153 break;
154 case VmnmxOperation::Min:
155 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
156 break;
157 case VmnmxOperation::Max:
158 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
159 break;
160 case VmnmxOperation::Nop:
161 break;
162 default:
163 UNREACHABLE();
164 break;
165 }
166
167 SetRegister(bb, instr.gpr0, move(value));
168}
169
112} // namespace VideoCommon::Shader 170} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index baf7188d2..8852c8a1b 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
359 switch (cc) { 359 switch (cc) {
360 case Tegra::Shader::ConditionCode::NEU: 360 case Tegra::Shader::ConditionCode::NEU:
361 return GetInternalFlag(InternalFlag::Zero, true); 361 return GetInternalFlag(InternalFlag::Zero, true);
362 case Tegra::Shader::ConditionCode::FCSM_TR:
363 UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
364 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
362 default: 365 default:
363 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); 366 UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
364 return MakeNode<PredicateNode>(Pred::NeverExecute, false); 367 return MakeNode<PredicateNode>(Pred::NeverExecute, false);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 80fc9b82c..c6e7bdf50 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -312,6 +312,10 @@ private:
312 /// Conditionally saturates a half float pair 312 /// Conditionally saturates a half float pair
313 Node GetSaturatedHalfFloat(Node value, bool saturate = true); 313 Node GetSaturatedHalfFloat(Node value, bool saturate = true);
314 314
315 /// Get image component value by type and size
316 std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
317 u32 component_size, Node original_value);
318
315 /// Returns a predicate comparing two floats 319 /// Returns a predicate comparing two floats
316 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); 320 Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
317 /// Returns a predicate comparing two integers 321 /// Returns a predicate comparing two integers
@@ -350,6 +354,9 @@ private:
350 /// Marks the usage of a input or output attribute. 354 /// Marks the usage of a input or output attribute.
351 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); 355 void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
352 356
357 /// Decodes VMNMX instruction and inserts its code into the passed basic block.
358 void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
359
353 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 360 void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
354 const Node4& components); 361 const Node4& components);
355 362
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index ae8817465..e0acd44d3 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) {
504 return GetFormatBpp(pixel_format) / CHAR_BIT; 504 return GetFormatBpp(pixel_format) / CHAR_BIT;
505} 505}
506 506
507enum class SurfaceCompression {
508 None, // Not compressed
509 Compressed, // Texture is compressed
510 Converted, // Texture is converted before upload or after download
511 Rearranged, // Texture is swizzled before upload or after download
512};
513
514constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{
515 SurfaceCompression::None, // ABGR8U
516 SurfaceCompression::None, // ABGR8S
517 SurfaceCompression::None, // ABGR8UI
518 SurfaceCompression::None, // B5G6R5U
519 SurfaceCompression::None, // A2B10G10R10U
520 SurfaceCompression::None, // A1B5G5R5U
521 SurfaceCompression::None, // R8U
522 SurfaceCompression::None, // R8UI
523 SurfaceCompression::None, // RGBA16F
524 SurfaceCompression::None, // RGBA16U
525 SurfaceCompression::None, // RGBA16S
526 SurfaceCompression::None, // RGBA16UI
527 SurfaceCompression::None, // R11FG11FB10F
528 SurfaceCompression::None, // RGBA32UI
529 SurfaceCompression::Compressed, // DXT1
530 SurfaceCompression::Compressed, // DXT23
531 SurfaceCompression::Compressed, // DXT45
532 SurfaceCompression::Compressed, // DXN1
533 SurfaceCompression::Compressed, // DXN2UNORM
534 SurfaceCompression::Compressed, // DXN2SNORM
535 SurfaceCompression::Compressed, // BC7U
536 SurfaceCompression::Compressed, // BC6H_UF16
537 SurfaceCompression::Compressed, // BC6H_SF16
538 SurfaceCompression::Converted, // ASTC_2D_4X4
539 SurfaceCompression::None, // BGRA8
540 SurfaceCompression::None, // RGBA32F
541 SurfaceCompression::None, // RG32F
542 SurfaceCompression::None, // R32F
543 SurfaceCompression::None, // R16F
544 SurfaceCompression::None, // R16U
545 SurfaceCompression::None, // R16S
546 SurfaceCompression::None, // R16UI
547 SurfaceCompression::None, // R16I
548 SurfaceCompression::None, // RG16
549 SurfaceCompression::None, // RG16F
550 SurfaceCompression::None, // RG16UI
551 SurfaceCompression::None, // RG16I
552 SurfaceCompression::None, // RG16S
553 SurfaceCompression::None, // RGB32F
554 SurfaceCompression::None, // RGBA8_SRGB
555 SurfaceCompression::None, // RG8U
556 SurfaceCompression::None, // RG8S
557 SurfaceCompression::None, // RG32UI
558 SurfaceCompression::None, // RGBX16F
559 SurfaceCompression::None, // R32UI
560 SurfaceCompression::None, // R32I
561 SurfaceCompression::Converted, // ASTC_2D_8X8
562 SurfaceCompression::Converted, // ASTC_2D_8X5
563 SurfaceCompression::Converted, // ASTC_2D_5X4
564 SurfaceCompression::None, // BGRA8_SRGB
565 SurfaceCompression::Compressed, // DXT1_SRGB
566 SurfaceCompression::Compressed, // DXT23_SRGB
567 SurfaceCompression::Compressed, // DXT45_SRGB
568 SurfaceCompression::Compressed, // BC7U_SRGB
569 SurfaceCompression::None, // R4G4B4A4U
570 SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
571 SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
572 SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
573 SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB
574 SurfaceCompression::Converted, // ASTC_2D_5X5
575 SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
576 SurfaceCompression::Converted, // ASTC_2D_10X8
577 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
578 SurfaceCompression::Converted, // ASTC_2D_6X6
579 SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB
580 SurfaceCompression::Converted, // ASTC_2D_10X10
581 SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB
582 SurfaceCompression::Converted, // ASTC_2D_12X12
583 SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB
584 SurfaceCompression::Converted, // ASTC_2D_8X6
585 SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB
586 SurfaceCompression::Converted, // ASTC_2D_6X5
587 SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB
588 SurfaceCompression::None, // E5B9G9R9F
589 SurfaceCompression::None, // Z32F
590 SurfaceCompression::None, // Z16
591 SurfaceCompression::None, // Z24S8
592 SurfaceCompression::Rearranged, // S8Z24
593 SurfaceCompression::None, // Z32FS8
594}};
595
596constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) {
597 if (format == PixelFormat::Invalid) {
598 return SurfaceCompression::None;
599 }
600 DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size());
601 return compression_type_table[static_cast<std::size_t>(format)];
602}
603
604SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); 507SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type);
605 508
606bool SurfaceTargetIsLayered(SurfaceTarget target); 509bool SurfaceTargetIsLayered(SurfaceTarget target);
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 002df414f..7af0e792c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192,
18 18
19using Tegra::Texture::ConvertFromGuestToHost; 19using Tegra::Texture::ConvertFromGuestToHost;
20using VideoCore::MortonSwizzleMode; 20using VideoCore::MortonSwizzleMode;
21using VideoCore::Surface::SurfaceCompression; 21using VideoCore::Surface::IsPixelFormatASTC;
22using VideoCore::Surface::PixelFormat;
22 23
23StagingCache::StagingCache() = default; 24StagingCache::StagingCache() = default;
24 25
25StagingCache::~StagingCache() = default; 26StagingCache::~StagingCache() = default;
26 27
27SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) 28SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
28 : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, 29 bool is_astc_supported)
29 mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { 30 : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels),
31 mipmap_offsets(params.num_levels) {
32 is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported;
33 host_memory_size = params.GetHostSizeInBytes(is_converted);
34
30 std::size_t offset = 0; 35 std::size_t offset = 0;
31 for (u32 level = 0; level < params.num_levels; ++level) { 36 for (u32 level = 0; level < params.num_levels; ++level) {
32 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; 37 const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
@@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf
164 169
165 std::size_t guest_offset{mipmap_offsets[level]}; 170 std::size_t guest_offset{mipmap_offsets[level]};
166 if (params.is_layered) { 171 if (params.is_layered) {
167 std::size_t host_offset{0}; 172 std::size_t host_offset = 0;
168 const std::size_t guest_stride = layer_size; 173 const std::size_t guest_stride = layer_size;
169 const std::size_t host_stride = params.GetHostLayerSize(level); 174 const std::size_t host_stride = params.GetHostLayerSize(level);
170 for (u32 layer = 0; layer < params.depth; ++layer) { 175 for (u32 layer = 0; layer < params.depth; ++layer) {
@@ -185,28 +190,17 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
185 MICROPROFILE_SCOPE(GPU_Load_Texture); 190 MICROPROFILE_SCOPE(GPU_Load_Texture);
186 auto& staging_buffer = staging_cache.GetBuffer(0); 191 auto& staging_buffer = staging_cache.GetBuffer(0);
187 u8* host_ptr; 192 u8* host_ptr;
188 is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); 193 // Use an extra temporal buffer
189 194 auto& tmp_buffer = staging_cache.GetBuffer(1);
190 // Handle continuouty 195 tmp_buffer.resize(guest_memory_size);
191 if (is_continuous) { 196 host_ptr = tmp_buffer.data();
192 // Use physical memory directly 197 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
193 host_ptr = memory_manager.GetPointer(gpu_addr);
194 if (!host_ptr) {
195 return;
196 }
197 } else {
198 // Use an extra temporal buffer
199 auto& tmp_buffer = staging_cache.GetBuffer(1);
200 tmp_buffer.resize(guest_memory_size);
201 host_ptr = tmp_buffer.data();
202 memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
203 }
204 198
205 if (params.is_tiled) { 199 if (params.is_tiled) {
206 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", 200 ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}",
207 params.block_width, static_cast<u32>(params.target)); 201 params.block_width, static_cast<u32>(params.target));
208 for (u32 level = 0; level < params.num_levels; ++level) { 202 for (u32 level = 0; level < params.num_levels; ++level) {
209 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; 203 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
210 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, 204 SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params,
211 staging_buffer.data() + host_offset, level); 205 staging_buffer.data() + host_offset, level);
212 } 206 }
@@ -219,7 +213,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
219 const u32 height{(params.height + block_height - 1) / block_height}; 213 const u32 height{(params.height + block_height - 1) / block_height};
220 const u32 copy_size{width * bpp}; 214 const u32 copy_size{width * bpp};
221 if (params.pitch == copy_size) { 215 if (params.pitch == copy_size) {
222 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); 216 std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false));
223 } else { 217 } else {
224 const u8* start{host_ptr}; 218 const u8* start{host_ptr};
225 u8* write_to{staging_buffer.data()}; 219 u8* write_to{staging_buffer.data()};
@@ -231,19 +225,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager,
231 } 225 }
232 } 226 }
233 227
234 auto compression_type = params.GetCompressionType(); 228 if (!is_converted && params.pixel_format != PixelFormat::S8Z24) {
235 if (compression_type == SurfaceCompression::None ||
236 compression_type == SurfaceCompression::Compressed)
237 return; 229 return;
230 }
238 231
239 for (u32 level_up = params.num_levels; level_up > 0; --level_up) { 232 for (u32 level = params.num_levels; level--;) {
240 const u32 level = level_up - 1; 233 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)};
241 const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; 234 const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)};
242 const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged 235 u8* const in_buffer = staging_buffer.data() + in_host_offset;
243 ? in_host_offset 236 u8* const out_buffer = staging_buffer.data() + out_host_offset;
244 : params.GetConvertedMipmapOffset(level);
245 u8* in_buffer = staging_buffer.data() + in_host_offset;
246 u8* out_buffer = staging_buffer.data() + out_host_offset;
247 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, 237 ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format,
248 params.GetMipWidth(level), params.GetMipHeight(level), 238 params.GetMipWidth(level), params.GetMipHeight(level),
249 params.GetMipDepth(level), true, true); 239 params.GetMipDepth(level), true, true);
@@ -256,24 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
256 auto& staging_buffer = staging_cache.GetBuffer(0); 246 auto& staging_buffer = staging_cache.GetBuffer(0);
257 u8* host_ptr; 247 u8* host_ptr;
258 248
259 // Handle continuouty 249 // Use an extra temporal buffer
260 if (is_continuous) { 250 auto& tmp_buffer = staging_cache.GetBuffer(1);
261 // Use physical memory directly 251 tmp_buffer.resize(guest_memory_size);
262 host_ptr = memory_manager.GetPointer(gpu_addr); 252 host_ptr = tmp_buffer.data();
263 if (!host_ptr) {
264 return;
265 }
266 } else {
267 // Use an extra temporal buffer
268 auto& tmp_buffer = staging_cache.GetBuffer(1);
269 tmp_buffer.resize(guest_memory_size);
270 host_ptr = tmp_buffer.data();
271 }
272 253
273 if (params.is_tiled) { 254 if (params.is_tiled) {
274 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); 255 ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width);
275 for (u32 level = 0; level < params.num_levels; ++level) { 256 for (u32 level = 0; level < params.num_levels; ++level) {
276 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; 257 const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)};
277 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, 258 SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
278 staging_buffer.data() + host_offset, level); 259 staging_buffer.data() + host_offset, level);
279 } 260 }
@@ -299,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
299 } 280 }
300 } 281 }
301 } 282 }
302 if (!is_continuous) { 283 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
303 memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size);
304 }
305} 284}
306 285
307} // namespace VideoCommon 286} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 5f79bb0aa..a39a8661b 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -68,8 +68,8 @@ public:
68 return gpu_addr; 68 return gpu_addr;
69 } 69 }
70 70
71 bool Overlaps(const CacheAddr start, const CacheAddr end) const { 71 bool Overlaps(const VAddr start, const VAddr end) const {
72 return (cache_addr < end) && (cache_addr_end > start); 72 return (cpu_addr < end) && (cpu_addr_end > start);
73 } 73 }
74 74
75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { 75 bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) {
@@ -86,21 +86,13 @@ public:
86 return cpu_addr; 86 return cpu_addr;
87 } 87 }
88 88
89 void SetCpuAddr(const VAddr new_addr) { 89 VAddr GetCpuAddrEnd() const {
90 cpu_addr = new_addr; 90 return cpu_addr_end;
91 }
92
93 CacheAddr GetCacheAddr() const {
94 return cache_addr;
95 } 91 }
96 92
97 CacheAddr GetCacheAddrEnd() const { 93 void SetCpuAddr(const VAddr new_addr) {
98 return cache_addr_end; 94 cpu_addr = new_addr;
99 } 95 cpu_addr_end = new_addr + guest_memory_size;
100
101 void SetCacheAddr(const CacheAddr new_addr) {
102 cache_addr = new_addr;
103 cache_addr_end = new_addr + guest_memory_size;
104 } 96 }
105 97
106 const SurfaceParams& GetSurfaceParams() const { 98 const SurfaceParams& GetSurfaceParams() const {
@@ -119,18 +111,14 @@ public:
119 return mipmap_sizes[level]; 111 return mipmap_sizes[level];
120 } 112 }
121 113
122 void MarkAsContinuous(const bool is_continuous) {
123 this->is_continuous = is_continuous;
124 }
125
126 bool IsContinuous() const {
127 return is_continuous;
128 }
129
130 bool IsLinear() const { 114 bool IsLinear() const {
131 return !params.is_tiled; 115 return !params.is_tiled;
132 } 116 }
133 117
118 bool IsConverted() const {
119 return is_converted;
120 }
121
134 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { 122 bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const {
135 return params.pixel_format == pixel_format; 123 return params.pixel_format == pixel_format;
136 } 124 }
@@ -160,7 +148,8 @@ public:
160 } 148 }
161 149
162protected: 150protected:
163 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); 151 explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params,
152 bool is_astc_supported);
164 ~SurfaceBaseImpl() = default; 153 ~SurfaceBaseImpl() = default;
165 154
166 virtual void DecorateSurfaceName() = 0; 155 virtual void DecorateSurfaceName() = 0;
@@ -168,12 +157,11 @@ protected:
168 const SurfaceParams params; 157 const SurfaceParams params;
169 std::size_t layer_size; 158 std::size_t layer_size;
170 std::size_t guest_memory_size; 159 std::size_t guest_memory_size;
171 const std::size_t host_memory_size; 160 std::size_t host_memory_size;
172 GPUVAddr gpu_addr{}; 161 GPUVAddr gpu_addr{};
173 CacheAddr cache_addr{};
174 CacheAddr cache_addr_end{};
175 VAddr cpu_addr{}; 162 VAddr cpu_addr{};
176 bool is_continuous{}; 163 VAddr cpu_addr_end{};
164 bool is_converted{};
177 165
178 std::vector<std::size_t> mipmap_sizes; 166 std::vector<std::size_t> mipmap_sizes;
179 std::vector<std::size_t> mipmap_offsets; 167 std::vector<std::size_t> mipmap_offsets;
@@ -288,8 +276,9 @@ public:
288 } 276 }
289 277
290protected: 278protected:
291 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) 279 explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params,
292 : SurfaceBaseImpl(gpu_addr, params) {} 280 bool is_astc_supported)
281 : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {}
293 282
294 ~SurfaceBase() = default; 283 ~SurfaceBase() = default;
295 284
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 9931c5ef7..6f3ef45be 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
113 params.height = tic.Height(); 113 params.height = tic.Height();
114 params.depth = tic.Depth(); 114 params.depth = tic.Depth();
115 params.pitch = params.is_tiled ? 0 : tic.Pitch(); 115 params.pitch = params.is_tiled ? 0 : tic.Pitch();
116 if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { 116 if (params.target == SurfaceTarget::TextureCubemap ||
117 params.depth = 1; 117 params.target == SurfaceTarget::TextureCubeArray) {
118 } else if (params.target == SurfaceTarget::TextureCubemap ||
119 params.target == SurfaceTarget::TextureCubeArray) {
120 params.depth *= 6; 118 params.depth *= 6;
121 } 119 }
122 params.num_levels = tic.max_mip_level + 1; 120 params.num_levels = tic.max_mip_level + 1;
@@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const {
309 return offset; 307 return offset;
310} 308}
311 309
312std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { 310std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const {
313 std::size_t offset = 0;
314 for (u32 i = 0; i < level; i++) {
315 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
316 }
317 return offset;
318}
319
320std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const {
321 std::size_t offset = 0; 311 std::size_t offset = 0;
322 for (u32 i = 0; i < level; i++) { 312 if (is_converted) {
323 offset += GetConvertedMipmapSize(i); 313 for (u32 i = 0; i < level; ++i) {
314 offset += GetConvertedMipmapSize(i) * GetNumLayers();
315 }
316 } else {
317 for (u32 i = 0; i < level; ++i) {
318 offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers();
319 }
324 } 320 }
325 return offset; 321 return offset;
326} 322}
327 323
328std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { 324std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const {
329 constexpr std::size_t rgba8_bpp = 4ULL; 325 constexpr std::size_t rgba8_bpp = 4ULL;
330 const std::size_t width_t = GetMipWidth(level); 326 const std::size_t mip_width = GetMipWidth(level);
331 const std::size_t height_t = GetMipHeight(level); 327 const std::size_t mip_height = GetMipHeight(level);
332 const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); 328 const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level);
333 return width_t * height_t * depth_t * rgba8_bpp; 329 return mip_width * mip_height * mip_depth * rgba8_bpp;
334} 330}
335 331
336std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { 332std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const {
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 995cc3818..24957df8d 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -20,8 +20,6 @@ namespace VideoCommon {
20 20
21class FormatLookupTable; 21class FormatLookupTable;
22 22
23using VideoCore::Surface::SurfaceCompression;
24
25class SurfaceParams { 23class SurfaceParams {
26public: 24public:
27 /// Creates SurfaceCachedParams from a texture configuration. 25 /// Creates SurfaceCachedParams from a texture configuration.
@@ -67,16 +65,14 @@ public:
67 return GetInnerMemorySize(false, false, false); 65 return GetInnerMemorySize(false, false, false);
68 } 66 }
69 67
70 std::size_t GetHostSizeInBytes() const { 68 std::size_t GetHostSizeInBytes(bool is_converted) const {
71 std::size_t host_size_in_bytes; 69 if (!is_converted) {
72 if (GetCompressionType() == SurfaceCompression::Converted) { 70 return GetInnerMemorySize(true, false, false);
73 // ASTC is uncompressed in software, in emulated as RGBA8 71 }
74 host_size_in_bytes = 0; 72 // ASTC is uncompressed in software, in emulated as RGBA8
75 for (u32 level = 0; level < num_levels; ++level) { 73 std::size_t host_size_in_bytes = 0;
76 host_size_in_bytes += GetConvertedMipmapSize(level); 74 for (u32 level = 0; level < num_levels; ++level) {
77 } 75 host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers();
78 } else {
79 host_size_in_bytes = GetInnerMemorySize(true, false, false);
80 } 76 }
81 return host_size_in_bytes; 77 return host_size_in_bytes;
82 } 78 }
@@ -107,9 +103,8 @@ public:
107 u32 GetMipBlockDepth(u32 level) const; 103 u32 GetMipBlockDepth(u32 level) const;
108 104
109 /// Returns the best possible row/pitch alignment for the surface. 105 /// Returns the best possible row/pitch alignment for the surface.
110 u32 GetRowAlignment(u32 level) const { 106 u32 GetRowAlignment(u32 level, bool is_converted) const {
111 const u32 bpp = 107 const u32 bpp = is_converted ? 4 : GetBytesPerPixel();
112 GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel();
113 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); 108 return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp);
114 } 109 }
115 110
@@ -117,11 +112,7 @@ public:
117 std::size_t GetGuestMipmapLevelOffset(u32 level) const; 112 std::size_t GetGuestMipmapLevelOffset(u32 level) const;
118 113
119 /// Returns the offset in bytes in host memory (linear) of a given mipmap level. 114 /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
120 std::size_t GetHostMipmapLevelOffset(u32 level) const; 115 std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const;
121
122 /// Returns the offset in bytes in host memory (linear) of a given mipmap level
123 /// for a texture that is converted in host gpu.
124 std::size_t GetConvertedMipmapOffset(u32 level) const;
125 116
126 /// Returns the size in bytes in guest memory of a given mipmap level. 117 /// Returns the size in bytes in guest memory of a given mipmap level.
127 std::size_t GetGuestMipmapSize(u32 level) const { 118 std::size_t GetGuestMipmapSize(u32 level) const {
@@ -196,11 +187,6 @@ public:
196 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; 187 pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;
197 } 188 }
198 189
199 /// Returns how the compression should be handled for this texture.
200 SurfaceCompression GetCompressionType() const {
201 return VideoCore::Surface::GetFormatCompressionType(pixel_format);
202 }
203
204 /// Returns is the surface is a TextureBuffer type of surface. 190 /// Returns is the surface is a TextureBuffer type of surface.
205 bool IsBuffer() const { 191 bool IsBuffer() const {
206 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; 192 return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6cdbe63d0..4edd4313b 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
52 52
53template <typename TSurface, typename TView> 53template <typename TSurface, typename TView>
54class TextureCache { 54class TextureCache {
55 using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>;
56 using IntervalType = typename IntervalMap::interval_type;
57 55
58public: 56public:
59 void InvalidateRegion(CacheAddr addr, std::size_t size) { 57 void InvalidateRegion(VAddr addr, std::size_t size) {
60 std::lock_guard lock{mutex}; 58 std::lock_guard lock{mutex};
61 59
62 for (const auto& surface : GetSurfacesInRegion(addr, size)) { 60 for (const auto& surface : GetSurfacesInRegion(addr, size)) {
@@ -76,7 +74,7 @@ public:
76 guard_samplers = new_guard; 74 guard_samplers = new_guard;
77 } 75 }
78 76
79 void FlushRegion(CacheAddr addr, std::size_t size) { 77 void FlushRegion(VAddr addr, std::size_t size) {
80 std::lock_guard lock{mutex}; 78 std::lock_guard lock{mutex};
81 79
82 auto surfaces = GetSurfacesInRegion(addr, size); 80 auto surfaces = GetSurfacesInRegion(addr, size);
@@ -99,9 +97,9 @@ public:
99 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 97 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
100 } 98 }
101 99
102 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 100 const std::optional<VAddr> cpu_addr =
103 const auto cache_addr{ToCacheAddr(host_ptr)}; 101 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
104 if (!cache_addr) { 102 if (!cpu_addr) {
105 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 103 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
106 } 104 }
107 105
@@ -110,7 +108,7 @@ public:
110 } 108 }
111 109
112 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; 110 const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
113 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 111 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
114 if (guard_samplers) { 112 if (guard_samplers) {
115 sampled_textures.push_back(surface); 113 sampled_textures.push_back(surface);
116 } 114 }
@@ -124,13 +122,13 @@ public:
124 if (!gpu_addr) { 122 if (!gpu_addr) {
125 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 123 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
126 } 124 }
127 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 125 const std::optional<VAddr> cpu_addr =
128 const auto cache_addr{ToCacheAddr(host_ptr)}; 126 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
129 if (!cache_addr) { 127 if (!cpu_addr) {
130 return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); 128 return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
131 } 129 }
132 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; 130 const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
133 const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); 131 const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false);
134 if (guard_samplers) { 132 if (guard_samplers) {
135 sampled_textures.push_back(surface); 133 sampled_textures.push_back(surface);
136 } 134 }
@@ -145,7 +143,7 @@ public:
145 return any_rt; 143 return any_rt;
146 } 144 }
147 145
148 TView GetDepthBufferSurface(bool preserve_contents) { 146 TView GetDepthBufferSurface() {
149 std::lock_guard lock{mutex}; 147 std::lock_guard lock{mutex};
150 auto& maxwell3d = system.GPU().Maxwell3D(); 148 auto& maxwell3d = system.GPU().Maxwell3D();
151 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { 149 if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
@@ -159,14 +157,14 @@ public:
159 SetEmptyDepthBuffer(); 157 SetEmptyDepthBuffer();
160 return {}; 158 return {};
161 } 159 }
162 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 160 const std::optional<VAddr> cpu_addr =
163 const auto cache_addr{ToCacheAddr(host_ptr)}; 161 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
164 if (!cache_addr) { 162 if (!cpu_addr) {
165 SetEmptyDepthBuffer(); 163 SetEmptyDepthBuffer();
166 return {}; 164 return {};
167 } 165 }
168 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; 166 const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
169 auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); 167 auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true);
170 if (depth_buffer.target) 168 if (depth_buffer.target)
171 depth_buffer.target->MarkAsRenderTarget(false, NO_RT); 169 depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
172 depth_buffer.target = surface_view.first; 170 depth_buffer.target = surface_view.first;
@@ -176,7 +174,7 @@ public:
176 return surface_view.second; 174 return surface_view.second;
177 } 175 }
178 176
179 TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { 177 TView GetColorBufferSurface(std::size_t index) {
180 std::lock_guard lock{mutex}; 178 std::lock_guard lock{mutex};
181 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); 179 ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
182 auto& maxwell3d = system.GPU().Maxwell3D(); 180 auto& maxwell3d = system.GPU().Maxwell3D();
@@ -199,16 +197,15 @@ public:
199 return {}; 197 return {};
200 } 198 }
201 199
202 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 200 const std::optional<VAddr> cpu_addr =
203 const auto cache_addr{ToCacheAddr(host_ptr)}; 201 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
204 if (!cache_addr) { 202 if (!cpu_addr) {
205 SetEmptyColorBuffer(index); 203 SetEmptyColorBuffer(index);
206 return {}; 204 return {};
207 } 205 }
208 206
209 auto surface_view = 207 auto surface_view = GetSurface(gpu_addr, *cpu_addr,
210 GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), 208 SurfaceParams::CreateForFramebuffer(system, index), true);
211 preserve_contents, true);
212 if (render_targets[index].target) 209 if (render_targets[index].target)
213 render_targets[index].target->MarkAsRenderTarget(false, NO_RT); 210 render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
214 render_targets[index].target = surface_view.first; 211 render_targets[index].target = surface_view.first;
@@ -257,27 +254,26 @@ public:
257 const GPUVAddr src_gpu_addr = src_config.Address(); 254 const GPUVAddr src_gpu_addr = src_config.Address();
258 const GPUVAddr dst_gpu_addr = dst_config.Address(); 255 const GPUVAddr dst_gpu_addr = dst_config.Address();
259 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); 256 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
260 const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; 257 const std::optional<VAddr> dst_cpu_addr =
261 const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; 258 system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr);
262 const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; 259 const std::optional<VAddr> src_cpu_addr =
263 const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; 260 system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr);
264 std::pair<TSurface, TView> dst_surface = 261 std::pair<TSurface, TView> dst_surface =
265 GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); 262 GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false);
266 std::pair<TSurface, TView> src_surface = 263 std::pair<TSurface, TView> src_surface =
267 GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); 264 GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false);
268 ImageBlit(src_surface.second, dst_surface.second, copy_config); 265 ImageBlit(src_surface.second, dst_surface.second, copy_config);
269 dst_surface.first->MarkAsModified(true, Tick()); 266 dst_surface.first->MarkAsModified(true, Tick());
270 } 267 }
271 268
272 TSurface TryFindFramebufferSurface(const u8* host_ptr) { 269 TSurface TryFindFramebufferSurface(VAddr addr) {
273 const CacheAddr cache_addr = ToCacheAddr(host_ptr); 270 if (!addr) {
274 if (!cache_addr) {
275 return nullptr; 271 return nullptr;
276 } 272 }
277 const CacheAddr page = cache_addr >> registry_page_bits; 273 const VAddr page = addr >> registry_page_bits;
278 std::vector<TSurface>& list = registry[page]; 274 std::vector<TSurface>& list = registry[page];
279 for (auto& surface : list) { 275 for (auto& surface : list) {
280 if (surface->GetCacheAddr() == cache_addr) { 276 if (surface->GetCpuAddr() == addr) {
281 return surface; 277 return surface;
282 } 278 }
283 } 279 }
@@ -289,8 +285,9 @@ public:
289 } 285 }
290 286
291protected: 287protected:
292 TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) 288 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
293 : system{system}, rasterizer{rasterizer} { 289 bool is_astc_supported)
290 : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
294 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { 291 for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
295 SetEmptyColorBuffer(i); 292 SetEmptyColorBuffer(i);
296 } 293 }
@@ -337,18 +334,14 @@ protected:
337 334
338 void Register(TSurface surface) { 335 void Register(TSurface surface) {
339 const GPUVAddr gpu_addr = surface->GetGpuAddr(); 336 const GPUVAddr gpu_addr = surface->GetGpuAddr();
340 const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr));
341 const std::size_t size = surface->GetSizeInBytes(); 337 const std::size_t size = surface->GetSizeInBytes();
342 const std::optional<VAddr> cpu_addr = 338 const std::optional<VAddr> cpu_addr =
343 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); 339 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
344 if (!cache_ptr || !cpu_addr) { 340 if (!cpu_addr) {
345 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", 341 LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
346 gpu_addr); 342 gpu_addr);
347 return; 343 return;
348 } 344 }
349 const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size);
350 surface->MarkAsContinuous(continuous);
351 surface->SetCacheAddr(cache_ptr);
352 surface->SetCpuAddr(*cpu_addr); 345 surface->SetCpuAddr(*cpu_addr);
353 RegisterInnerCache(surface); 346 RegisterInnerCache(surface);
354 surface->MarkAsRegistered(true); 347 surface->MarkAsRegistered(true);
@@ -381,6 +374,7 @@ protected:
381 } 374 }
382 375
383 Core::System& system; 376 Core::System& system;
377 const bool is_astc_supported;
384 378
385private: 379private:
386 enum class RecycleStrategy : u32 { 380 enum class RecycleStrategy : u32 {
@@ -456,22 +450,18 @@ private:
456 * @param overlaps The overlapping surfaces registered in the cache. 450 * @param overlaps The overlapping surfaces registered in the cache.
457 * @param params The parameters for the new surface. 451 * @param params The parameters for the new surface.
458 * @param gpu_addr The starting address of the new surface. 452 * @param gpu_addr The starting address of the new surface.
459 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
460 * blank.
461 * @param untopological Indicates to the recycler that the texture has no way to match the 453 * @param untopological Indicates to the recycler that the texture has no way to match the
462 * overlaps due to topological reasons. 454 * overlaps due to topological reasons.
463 **/ 455 **/
464 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 456 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
465 const SurfaceParams& params, const GPUVAddr gpu_addr, 457 const SurfaceParams& params, const GPUVAddr gpu_addr,
466 const bool preserve_contents,
467 const MatchTopologyResult untopological) { 458 const MatchTopologyResult untopological) {
468 const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation;
469 for (auto& surface : overlaps) { 459 for (auto& surface : overlaps) {
470 Unregister(surface); 460 Unregister(surface);
471 } 461 }
472 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { 462 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
473 case RecycleStrategy::Ignore: { 463 case RecycleStrategy::Ignore: {
474 return InitializeSurface(gpu_addr, params, do_load); 464 return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
475 } 465 }
476 case RecycleStrategy::Flush: { 466 case RecycleStrategy::Flush: {
477 std::sort(overlaps.begin(), overlaps.end(), 467 std::sort(overlaps.begin(), overlaps.end(),
@@ -481,7 +471,7 @@ private:
481 for (auto& surface : overlaps) { 471 for (auto& surface : overlaps) {
482 FlushSurface(surface); 472 FlushSurface(surface);
483 } 473 }
484 return InitializeSurface(gpu_addr, params, preserve_contents); 474 return InitializeSurface(gpu_addr, params);
485 } 475 }
486 case RecycleStrategy::BufferCopy: { 476 case RecycleStrategy::BufferCopy: {
487 auto new_surface = GetUncachedSurface(gpu_addr, params); 477 auto new_surface = GetUncachedSurface(gpu_addr, params);
@@ -490,7 +480,7 @@ private:
490 } 480 }
491 default: { 481 default: {
492 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); 482 UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!");
493 return InitializeSurface(gpu_addr, params, do_load); 483 return InitializeSurface(gpu_addr, params);
494 } 484 }
495 } 485 }
496 } 486 }
@@ -519,7 +509,9 @@ private:
519 } 509 }
520 const auto& final_params = new_surface->GetSurfaceParams(); 510 const auto& final_params = new_surface->GetSurfaceParams();
521 if (cr_params.type != final_params.type) { 511 if (cr_params.type != final_params.type) {
522 BufferCopy(current_surface, new_surface); 512 if (Settings::values.use_accurate_gpu_emulation) {
513 BufferCopy(current_surface, new_surface);
514 }
523 } else { 515 } else {
524 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); 516 std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
525 for (auto& brick : bricks) { 517 for (auto& brick : bricks) {
@@ -626,14 +618,11 @@ private:
626 * @param params The parameters on the new surface. 618 * @param params The parameters on the new surface.
627 * @param gpu_addr The starting address of the new surface. 619 * @param gpu_addr The starting address of the new surface.
628 * @param cache_addr The starting address of the new surface on physical memory. 620 * @param cache_addr The starting address of the new surface on physical memory.
629 * @param preserve_contents Indicates that the new surface should be loaded from memory or
630 * left blank.
631 */ 621 */
632 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 622 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
633 const SurfaceParams& params, 623 const SurfaceParams& params,
634 const GPUVAddr gpu_addr, 624 const GPUVAddr gpu_addr,
635 const CacheAddr cache_addr, 625 const VAddr cpu_addr) {
636 bool preserve_contents) {
637 if (params.target == SurfaceTarget::Texture3D) { 626 if (params.target == SurfaceTarget::Texture3D) {
638 bool failed = false; 627 bool failed = false;
639 if (params.num_levels > 1) { 628 if (params.num_levels > 1) {
@@ -657,7 +646,7 @@ private:
657 failed = true; 646 failed = true;
658 break; 647 break;
659 } 648 }
660 const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); 649 const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr);
661 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); 650 const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
662 modified |= surface->IsModified(); 651 modified |= surface->IsModified();
663 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, 652 const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
@@ -677,23 +666,23 @@ private:
677 } else { 666 } else {
678 for (const auto& surface : overlaps) { 667 for (const auto& surface : overlaps) {
679 if (!surface->MatchTarget(params.target)) { 668 if (!surface->MatchTarget(params.target)) {
680 if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { 669 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
681 if (Settings::values.use_accurate_gpu_emulation) { 670 if (Settings::values.use_accurate_gpu_emulation) {
682 return std::nullopt; 671 return std::nullopt;
683 } 672 }
684 Unregister(surface); 673 Unregister(surface);
685 return InitializeSurface(gpu_addr, params, preserve_contents); 674 return InitializeSurface(gpu_addr, params);
686 } 675 }
687 return std::nullopt; 676 return std::nullopt;
688 } 677 }
689 if (surface->GetCacheAddr() != cache_addr) { 678 if (surface->GetCpuAddr() != cpu_addr) {
690 continue; 679 continue;
691 } 680 }
692 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { 681 if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) {
693 return {{surface, surface->GetMainView()}}; 682 return {{surface, surface->GetMainView()}};
694 } 683 }
695 } 684 }
696 return InitializeSurface(gpu_addr, params, preserve_contents); 685 return InitializeSurface(gpu_addr, params);
697 } 686 }
698 } 687 }
699 688
@@ -716,23 +705,19 @@ private:
716 * 705 *
717 * @param gpu_addr The starting address of the candidate surface. 706 * @param gpu_addr The starting address of the candidate surface.
718 * @param params The parameters on the candidate surface. 707 * @param params The parameters on the candidate surface.
719 * @param preserve_contents Indicates that the new surface should be loaded from memory or
720 * left blank.
721 * @param is_render Whether or not the surface is a render target. 708 * @param is_render Whether or not the surface is a render target.
722 **/ 709 **/
723 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, 710 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr,
724 const SurfaceParams& params, bool preserve_contents, 711 const SurfaceParams& params, bool is_render) {
725 bool is_render) {
726 // Step 1 712 // Step 1
727 // Check Level 1 Cache for a fast structural match. If candidate surface 713 // Check Level 1 Cache for a fast structural match. If candidate surface
728 // matches at certain level we are pretty much done. 714 // matches at certain level we are pretty much done.
729 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { 715 if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) {
730 TSurface& current_surface = iter->second; 716 TSurface& current_surface = iter->second;
731 const auto topological_result = current_surface->MatchesTopology(params); 717 const auto topological_result = current_surface->MatchesTopology(params);
732 if (topological_result != MatchTopologyResult::FullMatch) { 718 if (topological_result != MatchTopologyResult::FullMatch) {
733 std::vector<TSurface> overlaps{current_surface}; 719 std::vector<TSurface> overlaps{current_surface};
734 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 720 return RecycleSurface(overlaps, params, gpu_addr, topological_result);
735 topological_result);
736 } 721 }
737 722
738 const auto struct_result = current_surface->MatchesStructure(params); 723 const auto struct_result = current_surface->MatchesStructure(params);
@@ -753,11 +738,11 @@ private:
753 // Step 2 738 // Step 2
754 // Obtain all possible overlaps in the memory region 739 // Obtain all possible overlaps in the memory region
755 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 740 const std::size_t candidate_size = params.GetGuestSizeInBytes();
756 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; 741 auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)};
757 742
758 // If none are found, we are done. we just load the surface and create it. 743 // If none are found, we are done. we just load the surface and create it.
759 if (overlaps.empty()) { 744 if (overlaps.empty()) {
760 return InitializeSurface(gpu_addr, params, preserve_contents); 745 return InitializeSurface(gpu_addr, params);
761 } 746 }
762 747
763 // Step 3 748 // Step 3
@@ -767,15 +752,13 @@ private:
767 for (const auto& surface : overlaps) { 752 for (const auto& surface : overlaps) {
768 const auto topological_result = surface->MatchesTopology(params); 753 const auto topological_result = surface->MatchesTopology(params);
769 if (topological_result != MatchTopologyResult::FullMatch) { 754 if (topological_result != MatchTopologyResult::FullMatch) {
770 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 755 return RecycleSurface(overlaps, params, gpu_addr, topological_result);
771 topological_result);
772 } 756 }
773 } 757 }
774 758
775 // Check if it's a 3D texture 759 // Check if it's a 3D texture
776 if (params.block_depth > 0) { 760 if (params.block_depth > 0) {
777 auto surface = 761 auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr);
778 Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
779 if (surface) { 762 if (surface) {
780 return *surface; 763 return *surface;
781 } 764 }
@@ -795,8 +778,7 @@ private:
795 return *view; 778 return *view;
796 } 779 }
797 } 780 }
798 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 781 return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
799 MatchTopologyResult::FullMatch);
800 } 782 }
801 // Now we check if the candidate is a mipmap/layer of the overlap 783 // Now we check if the candidate is a mipmap/layer of the overlap
802 std::optional<TView> view = 784 std::optional<TView> view =
@@ -820,7 +802,7 @@ private:
820 pair.first->EmplaceView(params, gpu_addr, candidate_size); 802 pair.first->EmplaceView(params, gpu_addr, candidate_size);
821 if (mirage_view) 803 if (mirage_view)
822 return {pair.first, *mirage_view}; 804 return {pair.first, *mirage_view};
823 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 805 return RecycleSurface(overlaps, params, gpu_addr,
824 MatchTopologyResult::FullMatch); 806 MatchTopologyResult::FullMatch);
825 } 807 }
826 return {current_surface, *view}; 808 return {current_surface, *view};
@@ -836,8 +818,7 @@ private:
836 } 818 }
837 } 819 }
838 // We failed all the tests, recycle the overlaps into a new texture. 820 // We failed all the tests, recycle the overlaps into a new texture.
839 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 821 return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch);
840 MatchTopologyResult::FullMatch);
841 } 822 }
842 823
843 /** 824 /**
@@ -850,16 +831,16 @@ private:
850 * @param params The parameters on the candidate surface. 831 * @param params The parameters on the candidate surface.
851 **/ 832 **/
852 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 833 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
853 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 834 const std::optional<VAddr> cpu_addr =
854 const auto cache_addr{ToCacheAddr(host_ptr)}; 835 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
855 836
856 if (!cache_addr) { 837 if (!cpu_addr) {
857 Deduction result{}; 838 Deduction result{};
858 result.type = DeductionType::DeductionFailed; 839 result.type = DeductionType::DeductionFailed;
859 return result; 840 return result;
860 } 841 }
861 842
862 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { 843 if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) {
863 TSurface& current_surface = iter->second; 844 TSurface& current_surface = iter->second;
864 const auto topological_result = current_surface->MatchesTopology(params); 845 const auto topological_result = current_surface->MatchesTopology(params);
865 if (topological_result != MatchTopologyResult::FullMatch) { 846 if (topological_result != MatchTopologyResult::FullMatch) {
@@ -878,7 +859,7 @@ private:
878 } 859 }
879 860
880 const std::size_t candidate_size = params.GetGuestSizeInBytes(); 861 const std::size_t candidate_size = params.GetGuestSizeInBytes();
881 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; 862 auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)};
882 863
883 if (overlaps.empty()) { 864 if (overlaps.empty()) {
884 Deduction result{}; 865 Deduction result{};
@@ -995,10 +976,10 @@ private:
995 } 976 }
996 977
997 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 978 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
998 bool preserve_contents) { 979 bool do_load = true) {
999 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 980 auto new_surface{GetUncachedSurface(gpu_addr, params)};
1000 Register(new_surface); 981 Register(new_surface);
1001 if (preserve_contents) { 982 if (do_load) {
1002 LoadSurface(new_surface); 983 LoadSurface(new_surface);
1003 } 984 }
1004 return {new_surface, new_surface->GetMainView()}; 985 return {new_surface, new_surface->GetMainView()};
@@ -1022,10 +1003,10 @@ private:
1022 } 1003 }
1023 1004
1024 void RegisterInnerCache(TSurface& surface) { 1005 void RegisterInnerCache(TSurface& surface) {
1025 const CacheAddr cache_addr = surface->GetCacheAddr(); 1006 const VAddr cpu_addr = surface->GetCpuAddr();
1026 CacheAddr start = cache_addr >> registry_page_bits; 1007 VAddr start = cpu_addr >> registry_page_bits;
1027 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; 1008 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1028 l1_cache[cache_addr] = surface; 1009 l1_cache[cpu_addr] = surface;
1029 while (start <= end) { 1010 while (start <= end) {
1030 registry[start].push_back(surface); 1011 registry[start].push_back(surface);
1031 start++; 1012 start++;
@@ -1033,10 +1014,10 @@ private:
1033 } 1014 }
1034 1015
1035 void UnregisterInnerCache(TSurface& surface) { 1016 void UnregisterInnerCache(TSurface& surface) {
1036 const CacheAddr cache_addr = surface->GetCacheAddr(); 1017 const VAddr cpu_addr = surface->GetCpuAddr();
1037 CacheAddr start = cache_addr >> registry_page_bits; 1018 VAddr start = cpu_addr >> registry_page_bits;
1038 const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; 1019 const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits;
1039 l1_cache.erase(cache_addr); 1020 l1_cache.erase(cpu_addr);
1040 while (start <= end) { 1021 while (start <= end) {
1041 auto& reg{registry[start]}; 1022 auto& reg{registry[start]};
1042 reg.erase(std::find(reg.begin(), reg.end(), surface)); 1023 reg.erase(std::find(reg.begin(), reg.end(), surface));
@@ -1044,18 +1025,18 @@ private:
1044 } 1025 }
1045 } 1026 }
1046 1027
1047 std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { 1028 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1048 if (size == 0) { 1029 if (size == 0) {
1049 return {}; 1030 return {};
1050 } 1031 }
1051 const CacheAddr cache_addr_end = cache_addr + size; 1032 const VAddr cpu_addr_end = cpu_addr + size;
1052 CacheAddr start = cache_addr >> registry_page_bits; 1033 VAddr start = cpu_addr >> registry_page_bits;
1053 const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; 1034 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1054 std::vector<TSurface> surfaces; 1035 std::vector<TSurface> surfaces;
1055 while (start <= end) { 1036 while (start <= end) {
1056 std::vector<TSurface>& list = registry[start]; 1037 std::vector<TSurface>& list = registry[start];
1057 for (auto& surface : list) { 1038 for (auto& surface : list) {
1058 if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { 1039 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) {
1059 surface->MarkAsPicked(true); 1040 surface->MarkAsPicked(true);
1060 surfaces.push_back(surface); 1041 surfaces.push_back(surface);
1061 } 1042 }
@@ -1144,14 +1125,14 @@ private:
1144 // large in size. 1125 // large in size.
1145 static constexpr u64 registry_page_bits{20}; 1126 static constexpr u64 registry_page_bits{20};
1146 static constexpr u64 registry_page_size{1 << registry_page_bits}; 1127 static constexpr u64 registry_page_size{1 << registry_page_bits};
1147 std::unordered_map<CacheAddr, std::vector<TSurface>> registry; 1128 std::unordered_map<VAddr, std::vector<TSurface>> registry;
1148 1129
1149 static constexpr u32 DEPTH_RT = 8; 1130 static constexpr u32 DEPTH_RT = 8;
1150 static constexpr u32 NO_RT = 0xFFFFFFFF; 1131 static constexpr u32 NO_RT = 0xFFFFFFFF;
1151 1132
1152 // The L1 Cache is used for fast texture lookup before checking the overlaps 1133 // The L1 Cache is used for fast texture lookup before checking the overlaps
1153 // This avoids calculating size and other stuffs. 1134 // This avoids calculating size and other stuffs.
1154 std::unordered_map<CacheAddr, TSurface> l1_cache; 1135 std::unordered_map<VAddr, TSurface> l1_cache;
1155 1136
1156 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have 1137 /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
1157 /// previously been used. This is to prevent surfaces from being constantly created and 1138 /// previously been used. This is to prevent surfaces from being constantly created and
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index 062b4f252..365bde2f1 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -20,6 +20,8 @@
20#include <cstring> 20#include <cstring>
21#include <vector> 21#include <vector>
22 22
23#include <boost/container/static_vector.hpp>
24
23#include "common/common_types.h" 25#include "common/common_types.h"
24 26
25#include "video_core/textures/astc.h" 27#include "video_core/textures/astc.h"
@@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) {
39 41
40class InputBitStream { 42class InputBitStream {
41public: 43public:
42 explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) 44 constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0)
43 : m_CurByte(ptr), m_NextBit(start_offset % 8) {} 45 : cur_byte{ptr}, next_bit{start_offset % 8} {}
44 46
45 std::size_t GetBitsRead() const { 47 constexpr std::size_t GetBitsRead() const {
46 return m_BitsRead; 48 return bits_read;
47 } 49 }
48 50
49 u32 ReadBit() { 51 constexpr bool ReadBit() {
50 u32 bit = *m_CurByte >> m_NextBit++; 52 const bool bit = (*cur_byte >> next_bit++) & 1;
51 while (m_NextBit >= 8) { 53 while (next_bit >= 8) {
52 m_NextBit -= 8; 54 next_bit -= 8;
53 m_CurByte++; 55 cur_byte++;
54 } 56 }
55 57
56 m_BitsRead++; 58 bits_read++;
57 return bit & 1; 59 return bit;
58 } 60 }
59 61
60 u32 ReadBits(std::size_t nBits) { 62 constexpr u32 ReadBits(std::size_t nBits) {
61 u32 ret = 0; 63 u32 ret = 0;
62 for (std::size_t i = 0; i < nBits; ++i) { 64 for (std::size_t i = 0; i < nBits; ++i) {
63 ret |= (ReadBit() & 1) << i; 65 ret |= (ReadBit() & 1) << i;
@@ -66,7 +68,7 @@ public:
66 } 68 }
67 69
68 template <std::size_t nBits> 70 template <std::size_t nBits>
69 u32 ReadBits() { 71 constexpr u32 ReadBits() {
70 u32 ret = 0; 72 u32 ret = 0;
71 for (std::size_t i = 0; i < nBits; ++i) { 73 for (std::size_t i = 0; i < nBits; ++i) {
72 ret |= (ReadBit() & 1) << i; 74 ret |= (ReadBit() & 1) << i;
@@ -75,64 +77,58 @@ public:
75 } 77 }
76 78
77private: 79private:
78 const u8* m_CurByte; 80 const u8* cur_byte;
79 std::size_t m_NextBit = 0; 81 std::size_t next_bit = 0;
80 std::size_t m_BitsRead = 0; 82 std::size_t bits_read = 0;
81}; 83};
82 84
83class OutputBitStream { 85class OutputBitStream {
84public: 86public:
85 explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) 87 constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0)
86 : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} 88 : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {}
87
88 ~OutputBitStream() = default;
89 89
90 s32 GetBitsWritten() const { 90 constexpr std::size_t GetBitsWritten() const {
91 return m_BitsWritten; 91 return bits_written;
92 } 92 }
93 93
94 void WriteBitsR(u32 val, u32 nBits) { 94 constexpr void WriteBitsR(u32 val, u32 nBits) {
95 for (u32 i = 0; i < nBits; i++) { 95 for (u32 i = 0; i < nBits; i++) {
96 WriteBit((val >> (nBits - i - 1)) & 1); 96 WriteBit((val >> (nBits - i - 1)) & 1);
97 } 97 }
98 } 98 }
99 99
100 void WriteBits(u32 val, u32 nBits) { 100 constexpr void WriteBits(u32 val, u32 nBits) {
101 for (u32 i = 0; i < nBits; i++) { 101 for (u32 i = 0; i < nBits; i++) {
102 WriteBit((val >> i) & 1); 102 WriteBit((val >> i) & 1);
103 } 103 }
104 } 104 }
105 105
106private: 106private:
107 void WriteBit(s32 b) { 107 constexpr void WriteBit(bool b) {
108 108 if (bits_written >= num_bits) {
109 if (done)
110 return; 109 return;
110 }
111 111
112 const u32 mask = 1 << m_NextBit++; 112 const u32 mask = 1 << next_bit++;
113 113
114 // clear the bit 114 // clear the bit
115 *m_CurByte &= static_cast<u8>(~mask); 115 *cur_byte &= static_cast<u8>(~mask);
116 116
117 // Write the bit, if necessary 117 // Write the bit, if necessary
118 if (b) 118 if (b)
119 *m_CurByte |= static_cast<u8>(mask); 119 *cur_byte |= static_cast<u8>(mask);
120 120
121 // Next byte? 121 // Next byte?
122 if (m_NextBit >= 8) { 122 if (next_bit >= 8) {
123 m_CurByte += 1; 123 cur_byte += 1;
124 m_NextBit = 0; 124 next_bit = 0;
125 } 125 }
126
127 done = done || ++m_BitsWritten >= m_NumBits;
128 } 126 }
129 127
130 s32 m_BitsWritten = 0; 128 u8* cur_byte;
131 const s32 m_NumBits; 129 std::size_t num_bits;
132 u8* m_CurByte; 130 std::size_t bits_written = 0;
133 s32 m_NextBit = 0; 131 std::size_t next_bit = 0;
134
135 bool done = false;
136}; 132};
137 133
138template <typename IntType> 134template <typename IntType>
@@ -195,9 +191,13 @@ struct IntegerEncodedValue {
195 u32 trit_value; 191 u32 trit_value;
196 }; 192 };
197}; 193};
194using IntegerEncodedVector = boost::container::static_vector<
195 IntegerEncodedValue, 64,
196 boost::container::static_vector_options<
197 boost::container::inplace_alignment<alignof(IntegerEncodedValue)>,
198 boost::container::throw_on_overflow<false>>::type>;
198 199
199static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 200static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) {
200 u32 nBitsPerValue) {
201 // Implement the algorithm in section C.2.12 201 // Implement the algorithm in section C.2.12
202 u32 m[5]; 202 u32 m[5];
203 u32 t[5]; 203 u32 t[5];
@@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu
255 } 255 }
256} 256}
257 257
258static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, 258static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result,
259 u32 nBitsPerValue) { 259 u32 nBitsPerValue) {
260 // Implement the algorithm in section C.2.12 260 // Implement the algorithm in section C.2.12
261 u32 m[3]; 261 u32 m[3];
@@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues();
343// Fills result with the values that are encoded in the given 343// Fills result with the values that are encoded in the given
344// bitstream. We must know beforehand what the maximum possible 344// bitstream. We must know beforehand what the maximum possible
345// value is, and how many values we're decoding. 345// value is, and how many values we're decoding.
346static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, 346static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange,
347 u32 maxRange, u32 nValues) { 347 u32 nValues) {
348 // Determine encoding parameters 348 // Determine encoding parameters
349 IntegerEncodedValue val = EncodingsValues[maxRange]; 349 IntegerEncodedValue val = EncodingsValues[maxRange];
350 350
@@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) {
634// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] 634// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
635// is the same as [(numBits - 1):0] and repeats all the way down. 635// is the same as [(numBits - 1):0] and repeats all the way down.
636template <typename IntType> 636template <typename IntType>
637static IntType Replicate(IntType val, u32 numBits, u32 toBit) { 637static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) {
638 if (numBits == 0) 638 if (numBits == 0) {
639 return 0; 639 return 0;
640 if (toBit == 0) 640 }
641 if (toBit == 0) {
641 return 0; 642 return 0;
642 IntType v = val & static_cast<IntType>((1 << numBits) - 1); 643 }
644 const IntType v = val & static_cast<IntType>((1 << numBits) - 1);
643 IntType res = v; 645 IntType res = v;
644 u32 reslen = numBits; 646 u32 reslen = numBits;
645 while (reslen < toBit) { 647 while (reslen < toBit) {
@@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) {
656 return res; 658 return res;
657} 659}
658 660
661static constexpr std::size_t NumReplicateEntries(u32 num_bits) {
662 return std::size_t(1) << num_bits;
663}
664
665template <typename IntType, u32 num_bits, u32 to_bit>
666static constexpr auto MakeReplicateTable() {
667 std::array<IntType, NumReplicateEntries(num_bits)> table{};
668 for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) {
669 table[value] = Replicate(value, num_bits, to_bit);
670 }
671 return table;
672}
673
674static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>();
675static constexpr u32 ReplicateByteTo16(std::size_t value) {
676 return REPLICATE_BYTE_TO_16_TABLE[value];
677}
678
679static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>();
680static constexpr u32 ReplicateBitTo7(std::size_t value) {
681 return REPLICATE_BIT_TO_7_TABLE[value];
682}
683
684static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>();
685static constexpr u32 ReplicateBitTo9(std::size_t value) {
686 return REPLICATE_BIT_TO_9_TABLE[value];
687}
688
689static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>();
690static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>();
691static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>();
692static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>();
693static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>();
694static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>();
695static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>();
696static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>();
697/// Use a precompiled table with the most common usages, if it's not in the expected range, fallback
698/// to the runtime implementation
699static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) {
700 switch (num_bits) {
701 case 1:
702 return REPLICATE_1_BIT_TO_8_TABLE[value];
703 case 2:
704 return REPLICATE_2_BIT_TO_8_TABLE[value];
705 case 3:
706 return REPLICATE_3_BIT_TO_8_TABLE[value];
707 case 4:
708 return REPLICATE_4_BIT_TO_8_TABLE[value];
709 case 5:
710 return REPLICATE_5_BIT_TO_8_TABLE[value];
711 case 6:
712 return REPLICATE_6_BIT_TO_8_TABLE[value];
713 case 7:
714 return REPLICATE_7_BIT_TO_8_TABLE[value];
715 case 8:
716 return REPLICATE_8_BIT_TO_8_TABLE[value];
717 default:
718 return Replicate(value, num_bits, 8);
719 }
720}
721
722static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>();
723static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>();
724static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>();
725static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>();
726static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>();
727static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) {
728 switch (num_bits) {
729 case 1:
730 return REPLICATE_1_BIT_TO_6_TABLE[value];
731 case 2:
732 return REPLICATE_2_BIT_TO_6_TABLE[value];
733 case 3:
734 return REPLICATE_3_BIT_TO_6_TABLE[value];
735 case 4:
736 return REPLICATE_4_BIT_TO_6_TABLE[value];
737 case 5:
738 return REPLICATE_5_BIT_TO_6_TABLE[value];
739 default:
740 return Replicate(value, num_bits, 6);
741 }
742}
743
659class Pixel { 744class Pixel {
660protected: 745protected:
661 using ChannelType = s16; 746 using ChannelType = s16;
@@ -674,10 +759,10 @@ public:
674 // significant bits when going from larger to smaller bit depth 759 // significant bits when going from larger to smaller bit depth
675 // or by repeating the most significant bits when going from 760 // or by repeating the most significant bits when going from
676 // smaller to larger bit depths. 761 // smaller to larger bit depths.
677 void ChangeBitDepth(const u8 (&depth)[4]) { 762 void ChangeBitDepth() {
678 for (u32 i = 0; i < 4; i++) { 763 for (u32 i = 0; i < 4; i++) {
679 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); 764 Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]);
680 m_BitDepth[i] = depth[i]; 765 m_BitDepth[i] = 8;
681 } 766 }
682 } 767 }
683 768
@@ -689,28 +774,23 @@ public:
689 774
690 // Changes the bit depth of a single component. See the comment 775 // Changes the bit depth of a single component. See the comment
691 // above for how we do this. 776 // above for how we do this.
692 static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { 777 static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) {
693 assert(newDepth <= 8);
694 assert(oldDepth <= 8); 778 assert(oldDepth <= 8);
695 779
696 if (oldDepth == newDepth) { 780 if (oldDepth == 8) {
697 // Do nothing 781 // Do nothing
698 return val; 782 return val;
699 } else if (oldDepth == 0 && newDepth != 0) { 783 } else if (oldDepth == 0) {
700 return static_cast<ChannelType>((1 << newDepth) - 1); 784 return static_cast<ChannelType>((1 << 8) - 1);
701 } else if (newDepth > oldDepth) { 785 } else if (8 > oldDepth) {
702 return Replicate(val, oldDepth, newDepth); 786 return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth));
703 } else { 787 } else {
704 // oldDepth > newDepth 788 // oldDepth > newDepth
705 if (newDepth == 0) { 789 const u8 bitsWasted = static_cast<u8>(oldDepth - 8);
706 return 0xFF; 790 u16 v = static_cast<u16>(val);
707 } else { 791 v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
708 u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); 792 v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1));
709 u16 v = static_cast<u16>(val); 793 return static_cast<u8>(v);
710 v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
711 v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1));
712 return static_cast<u8>(v);
713 }
714 } 794 }
715 795
716 assert(false && "We shouldn't get here."); 796 assert(false && "We shouldn't get here.");
@@ -760,8 +840,7 @@ public:
760 // up in the most-significant byte. 840 // up in the most-significant byte.
761 u32 Pack() const { 841 u32 Pack() const {
762 Pixel eightBit(*this); 842 Pixel eightBit(*this);
763 const u8 eightBitDepth[4] = {8, 8, 8, 8}; 843 eightBit.ChangeBitDepth();
764 eightBit.ChangeBitDepth(eightBitDepth);
765 844
766 u32 r = 0; 845 u32 r = 0;
767 r |= eightBit.A(); 846 r |= eightBit.A();
@@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
816 } 895 }
817 896
818 // We now have enough to decode our integer sequence. 897 // We now have enough to decode our integer sequence.
819 std::vector<IntegerEncodedValue> decodedColorValues; 898 IntegerEncodedVector decodedColorValues;
820 decodedColorValues.reserve(32);
821 899
822 InputBitStream colorStream(data); 900 InputBitStream colorStream(data);
823 DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); 901 DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
@@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP
839 917
840 u32 A = 0, B = 0, C = 0, D = 0; 918 u32 A = 0, B = 0, C = 0, D = 0;
841 // A is just the lsb replicated 9 times. 919 // A is just the lsb replicated 9 times.
842 A = Replicate(bitval & 1, 1, 9); 920 A = ReplicateBitTo9(bitval & 1);
843 921
844 switch (val.encoding) { 922 switch (val.encoding) {
845 // Replicate bits 923 // Replicate bits
846 case IntegerEncoding::JustBits: 924 case IntegerEncoding::JustBits:
847 out[outIdx++] = Replicate(bitval, bitlen, 8); 925 out[outIdx++] = FastReplicateTo8(bitval, bitlen);
848 break; 926 break;
849 927
850 // Use algorithm in C.2.13 928 // Use algorithm in C.2.13
@@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
962 u32 bitval = val.bit_value; 1040 u32 bitval = val.bit_value;
963 u32 bitlen = val.num_bits; 1041 u32 bitlen = val.num_bits;
964 1042
965 u32 A = Replicate(bitval & 1, 1, 7); 1043 u32 A = ReplicateBitTo7(bitval & 1);
966 u32 B = 0, C = 0, D = 0; 1044 u32 B = 0, C = 0, D = 0;
967 1045
968 u32 result = 0; 1046 u32 result = 0;
969 switch (val.encoding) { 1047 switch (val.encoding) {
970 case IntegerEncoding::JustBits: 1048 case IntegerEncoding::JustBits:
971 result = Replicate(bitval, bitlen, 6); 1049 result = FastReplicateTo6(bitval, bitlen);
972 break; 1050 break;
973 1051
974 case IntegerEncoding::Trit: { 1052 case IntegerEncoding::Trit: {
@@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) {
1047 return result; 1125 return result;
1048} 1126}
1049 1127
1050static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, 1128static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights,
1051 const TexelWeightParams& params, const u32 blockWidth, 1129 const TexelWeightParams& params, const u32 blockWidth,
1052 const u32 blockHeight) { 1130 const u32 blockHeight) {
1053 u32 weightIdx = 0; 1131 u32 weightIdx = 0;
@@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1545 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); 1623 static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
1546 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); 1624 memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
1547 1625
1548 std::vector<IntegerEncodedValue> texelWeightValues; 1626 IntegerEncodedVector texelWeightValues;
1549 texelWeightValues.reserve(64);
1550 1627
1551 InputBitStream weightStream(texelWeightData); 1628 InputBitStream weightStream(texelWeightData);
1552 1629
@@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32
1568 Pixel p; 1645 Pixel p;
1569 for (u32 c = 0; c < 4; c++) { 1646 for (u32 c = 0; c < 4; c++) {
1570 u32 C0 = endpos32s[partition][0].Component(c); 1647 u32 C0 = endpos32s[partition][0].Component(c);
1571 C0 = Replicate(C0, 8, 16); 1648 C0 = ReplicateByteTo16(C0);
1572 u32 C1 = endpos32s[partition][1].Component(c); 1649 u32 C1 = endpos32s[partition][1].Component(c);
1573 C1 = Replicate(C1, 8, 16); 1650 C1 = ReplicateByteTo16(C1);
1574 1651
1575 u32 plane = 0; 1652 u32 plane = 0;
1576 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { 1653 if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp
new file mode 100644
index 000000000..d1939d744
--- /dev/null
+++ b/src/video_core/textures/texture.cpp
@@ -0,0 +1,80 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7
8#include "core/settings.h"
9#include "video_core/textures/texture.h"
10
11namespace Tegra::Texture {
12
13namespace {
14
15constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
16 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f,
17 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f,
18 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f,
19 0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f,
20 0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f,
21 0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f,
22 0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f,
23 0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f,
24 0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f,
25 0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f,
26 0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f,
27 0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f,
28 0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f,
29 0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f,
30 0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f,
31 0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f,
32 0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f,
33 0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f,
34 0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f,
35 0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f,
36 0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f,
37 0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f,
38 0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f,
39 0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f,
40 0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f,
41 0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f,
42 0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f,
43 0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f,
44 0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f,
45 0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f,
46 0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f,
47 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
48};
49
50unsigned SettingsMinimumAnisotropy() noexcept {
51 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) {
52 default:
53 case Anisotropy::Default:
54 return 1U;
55 case Anisotropy::Filter2x:
56 return 2U;
57 case Anisotropy::Filter4x:
58 return 4U;
59 case Anisotropy::Filter8x:
60 return 8U;
61 case Anisotropy::Filter16x:
62 return 16U;
63 }
64}
65
66} // Anonymous namespace
67
68std::array<float, 4> TSCEntry::GetBorderColor() const noexcept {
69 if (!srgb_conversion) {
70 return border_color;
71 }
72 return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g],
73 SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]};
74}
75
76float TSCEntry::GetMaxAnisotropy() const noexcept {
77 return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
78}
79
80} // namespace Tegra::Texture
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index 7edc4abe1..eba05aced 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -8,7 +8,6 @@
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/bit_field.h" 9#include "common/bit_field.h"
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "core/settings.h"
12 11
13namespace Tegra::Texture { 12namespace Tegra::Texture {
14 13
@@ -132,6 +131,20 @@ enum class SwizzleSource : u32 {
132 OneFloat = 7, 131 OneFloat = 7,
133}; 132};
134 133
134enum class MsaaMode : u32 {
135 Msaa1x1 = 0,
136 Msaa2x1 = 1,
137 Msaa2x2 = 2,
138 Msaa4x2 = 3,
139 Msaa4x2_D3D = 4,
140 Msaa2x1_D3D = 5,
141 Msaa4x4 = 6,
142 Msaa2x2_VC4 = 8,
143 Msaa2x2_VC12 = 9,
144 Msaa4x2_VC8 = 10,
145 Msaa4x2_VC24 = 11,
146};
147
135union TextureHandle { 148union TextureHandle {
136 TextureHandle(u32 raw) : raw{raw} {} 149 TextureHandle(u32 raw) : raw{raw} {}
137 150
@@ -198,6 +211,7 @@ struct TICEntry {
198 union { 211 union {
199 BitField<0, 4, u32> res_min_mip_level; 212 BitField<0, 4, u32> res_min_mip_level;
200 BitField<4, 4, u32> res_max_mip_level; 213 BitField<4, 4, u32> res_max_mip_level;
214 BitField<8, 4, MsaaMode> msaa_mode;
201 BitField<12, 12, u32> min_lod_clamp; 215 BitField<12, 12, u32> min_lod_clamp;
202 }; 216 };
203 217
@@ -336,24 +350,9 @@ struct TSCEntry {
336 std::array<u8, 0x20> raw; 350 std::array<u8, 0x20> raw;
337 }; 351 };
338 352
339 float GetMaxAnisotropy() const { 353 std::array<float, 4> GetBorderColor() const noexcept;
340 const u32 min_value = [] { 354
341 switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { 355 float GetMaxAnisotropy() const noexcept;
342 default:
343 case Anisotropy::Default:
344 return 1U;
345 case Anisotropy::Filter2x:
346 return 2U;
347 case Anisotropy::Filter4x:
348 return 4U;
349 case Anisotropy::Filter8x:
350 return 8U;
351 case Anisotropy::Filter16x:
352 return 16U;
353 }
354 }();
355 return static_cast<float>(std::max(1U << max_anisotropy, min_value));
356 }
357 356
358 float GetMinLod() const { 357 float GetMinLod() const {
359 return static_cast<float>(min_lod_clamp) / 256.0f; 358 return static_cast<float>(min_lod_clamp) / 256.0f;
@@ -368,15 +367,6 @@ struct TSCEntry {
368 constexpr u32 mask = 1U << (13 - 1); 367 constexpr u32 mask = 1U << (13 - 1);
369 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; 368 return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
370 } 369 }
371
372 std::array<float, 4> GetBorderColor() const {
373 if (srgb_conversion) {
374 return {static_cast<float>(srgb_border_color_r) / 255.0f,
375 static_cast<float>(srgb_border_color_g) / 255.0f,
376 static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
377 }
378 return border_color;
379 }
380}; 370};
381static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); 371static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
382 372
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index d34b47b3f..8b9404718 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -150,6 +150,10 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core)
150target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) 150target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets)
151target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) 151target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
152 152
153if (ENABLE_VULKAN AND NOT WIN32)
154 target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS})
155endif()
156
153target_compile_definitions(yuzu PRIVATE 157target_compile_definitions(yuzu PRIVATE
154 # Use QStringBuilder for string concatenation to reduce 158 # Use QStringBuilder for string concatenation to reduce
155 # the overall number of temporary strings created. 159 # the overall number of temporary strings created.
diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp
index d39b3f07a..695b2ef5f 100644
--- a/src/yuzu/about_dialog.cpp
+++ b/src/yuzu/about_dialog.cpp
@@ -3,15 +3,22 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <QIcon> 5#include <QIcon>
6#include <fmt/format.h>
6#include "common/scm_rev.h" 7#include "common/scm_rev.h"
7#include "ui_aboutdialog.h" 8#include "ui_aboutdialog.h"
8#include "yuzu/about_dialog.h" 9#include "yuzu/about_dialog.h"
9 10
10AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { 11AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) {
12 const auto build_id = std::string(Common::g_build_id);
13 const auto fmt = std::string(Common::g_title_bar_format_idle);
14 const auto yuzu_build_version =
15 fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
16 std::string{}, std::string{}, std::string{}, build_id);
17
11 ui->setupUi(this); 18 ui->setupUi(this);
12 ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200)); 19 ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200));
13 ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg( 20 ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg(
14 QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch), 21 QString::fromStdString(yuzu_build_version), QString::fromUtf8(Common::g_scm_branch),
15 QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10))); 22 QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10)));
16} 23}
17 24
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index eaded2640..1cac2f942 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -14,8 +14,9 @@
14#include <QScreen> 14#include <QScreen>
15#include <QStringList> 15#include <QStringList>
16#include <QWindow> 16#include <QWindow>
17#ifdef HAS_VULKAN 17
18#include <QVulkanWindow> 18#if !defined(WIN32) && HAS_VULKAN
19#include <qpa/qplatformnativeinterface.h>
19#endif 20#endif
20 21
21#include <fmt/format.h> 22#include <fmt/format.h>
@@ -224,7 +225,6 @@ public:
224 } 225 }
225 226
226 context->MakeCurrent(); 227 context->MakeCurrent();
227 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
228 if (Core::System::GetInstance().Renderer().TryPresent(100)) { 228 if (Core::System::GetInstance().Renderer().TryPresent(100)) {
229 context->SwapBuffers(); 229 context->SwapBuffers();
230 glFinish(); 230 glFinish();
@@ -238,16 +238,50 @@ private:
238#ifdef HAS_VULKAN 238#ifdef HAS_VULKAN
239class VulkanRenderWidget : public RenderWidget { 239class VulkanRenderWidget : public RenderWidget {
240public: 240public:
241 explicit VulkanRenderWidget(GRenderWindow* parent, QVulkanInstance* instance) 241 explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) {
242 : RenderWidget(parent) {
243 windowHandle()->setSurfaceType(QWindow::VulkanSurface); 242 windowHandle()->setSurfaceType(QWindow::VulkanSurface);
244 windowHandle()->setVulkanInstance(instance);
245 } 243 }
246}; 244};
247#endif 245#endif
248 246
249GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread) 247static Core::Frontend::WindowSystemType GetWindowSystemType() {
250 : QWidget(parent_), emu_thread(emu_thread) { 248 // Determine WSI type based on Qt platform.
249 QString platform_name = QGuiApplication::platformName();
250 if (platform_name == QStringLiteral("windows"))
251 return Core::Frontend::WindowSystemType::Windows;
252 else if (platform_name == QStringLiteral("xcb"))
253 return Core::Frontend::WindowSystemType::X11;
254 else if (platform_name == QStringLiteral("wayland"))
255 return Core::Frontend::WindowSystemType::Wayland;
256
257 LOG_CRITICAL(Frontend, "Unknown Qt platform!");
258 return Core::Frontend::WindowSystemType::Windows;
259}
260
261static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) {
262 Core::Frontend::EmuWindow::WindowSystemInfo wsi;
263 wsi.type = GetWindowSystemType();
264
265#ifdef HAS_VULKAN
266 // Our Win32 Qt external doesn't have the private API.
267#if defined(WIN32) || defined(__APPLE__)
268 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
269#else
270 QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface();
271 wsi.display_connection = pni->nativeResourceForWindow("display", window);
272 if (wsi.type == Core::Frontend::WindowSystemType::Wayland)
273 wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr;
274 else
275 wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr;
276#endif
277 wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f;
278#endif
279
280 return wsi;
281}
282
283GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread_)
284 : QWidget(parent_), emu_thread(emu_thread_) {
251 setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") 285 setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
252 .arg(QString::fromUtf8(Common::g_build_name), 286 .arg(QString::fromUtf8(Common::g_build_name),
253 QString::fromUtf8(Common::g_scm_branch), 287 QString::fromUtf8(Common::g_scm_branch),
@@ -460,6 +494,9 @@ bool GRenderWindow::InitRenderTarget() {
460 break; 494 break;
461 } 495 }
462 496
497 // Update the Window System information with the new render target
498 window_info = GetWindowSystemInfo(child_widget->windowHandle());
499
463 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); 500 child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height);
464 layout()->addWidget(child_widget); 501 layout()->addWidget(child_widget);
465 // Reset minimum required size to avoid resizing issues on the main window after restarting. 502 // Reset minimum required size to avoid resizing issues on the main window after restarting.
@@ -531,30 +568,7 @@ bool GRenderWindow::InitializeOpenGL() {
531 568
532bool GRenderWindow::InitializeVulkan() { 569bool GRenderWindow::InitializeVulkan() {
533#ifdef HAS_VULKAN 570#ifdef HAS_VULKAN
534 vk_instance = std::make_unique<QVulkanInstance>(); 571 auto child = new VulkanRenderWidget(this);
535 vk_instance->setApiVersion(QVersionNumber(1, 1, 0));
536 vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect);
537 if (Settings::values.renderer_debug) {
538 const auto supported_layers{vk_instance->supportedLayers()};
539 const bool found =
540 std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) {
541 constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation";
542 return layer.name == searched_layer;
543 });
544 if (found) {
545 vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation");
546 vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
547 }
548 }
549 if (!vk_instance->create()) {
550 QMessageBox::critical(
551 this, tr("Error while initializing Vulkan 1.1!"),
552 tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the "
553 "latest graphics drivers."));
554 return false;
555 }
556
557 auto child = new VulkanRenderWidget(this, vk_instance.get());
558 child_widget = child; 572 child_widget = child;
559 child_widget->windowHandle()->create(); 573 child_widget->windowHandle()->create();
560 main_context = std::make_unique<DummyContext>(); 574 main_context = std::make_unique<DummyContext>();
@@ -567,21 +581,6 @@ bool GRenderWindow::InitializeVulkan() {
567#endif 581#endif
568} 582}
569 583
570void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
571 void* surface) const {
572#ifdef HAS_VULKAN
573 const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr");
574 const VkInstance instance_copy = vk_instance->vkInstance();
575 const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_widget->windowHandle());
576
577 std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
578 std::memcpy(instance, &instance_copy, sizeof(instance_copy));
579 std::memcpy(surface, &surface_copy, sizeof(surface_copy));
580#else
581 UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan");
582#endif
583}
584
585bool GRenderWindow::LoadOpenGL() { 584bool GRenderWindow::LoadOpenGL() {
586 auto context = CreateSharedContext(); 585 auto context = CreateSharedContext();
587 auto scope = context->Acquire(); 586 auto scope = context->Acquire();
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index d69078df1..3626604ca 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -22,9 +22,6 @@ class GMainWindow;
22class QKeyEvent; 22class QKeyEvent;
23class QTouchEvent; 23class QTouchEvent;
24class QStringList; 24class QStringList;
25#ifdef HAS_VULKAN
26class QVulkanInstance;
27#endif
28 25
29namespace VideoCore { 26namespace VideoCore {
30enum class LoadCallbackStage; 27enum class LoadCallbackStage;
@@ -122,8 +119,6 @@ public:
122 // EmuWindow implementation. 119 // EmuWindow implementation.
123 void PollEvents() override; 120 void PollEvents() override;
124 bool IsShown() const override; 121 bool IsShown() const override;
125 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
126 void* surface) const override;
127 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 122 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
128 123
129 void BackupGeometry(); 124 void BackupGeometry();
@@ -186,10 +181,6 @@ private:
186 // should instead be shared from 181 // should instead be shared from
187 std::shared_ptr<Core::Frontend::GraphicsContext> main_context; 182 std::shared_ptr<Core::Frontend::GraphicsContext> main_context;
188 183
189#ifdef HAS_VULKAN
190 std::unique_ptr<QVulkanInstance> vk_instance;
191#endif
192
193 /// Temporary storage of the screenshot taken 184 /// Temporary storage of the screenshot taken
194 QImage screenshot_image; 185 QImage screenshot_image;
195 186
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp
index a821c7b3c..ea667caef 100644
--- a/src/yuzu/configuration/configure_graphics.cpp
+++ b/src/yuzu/configuration/configure_graphics.cpp
@@ -15,6 +15,10 @@
15#include "ui_configure_graphics.h" 15#include "ui_configure_graphics.h"
16#include "yuzu/configuration/configure_graphics.h" 16#include "yuzu/configuration/configure_graphics.h"
17 17
18#ifdef HAS_VULKAN
19#include "video_core/renderer_vulkan/renderer_vulkan.h"
20#endif
21
18namespace { 22namespace {
19enum class Resolution : int { 23enum class Resolution : int {
20 Auto, 24 Auto,
@@ -165,41 +169,9 @@ void ConfigureGraphics::UpdateDeviceComboBox() {
165 169
166void ConfigureGraphics::RetrieveVulkanDevices() { 170void ConfigureGraphics::RetrieveVulkanDevices() {
167#ifdef HAS_VULKAN 171#ifdef HAS_VULKAN
168 QVulkanInstance instance; 172 vulkan_devices.clear();
169 instance.setApiVersion(QVersionNumber(1, 1, 0)); 173 for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) {
170 if (!instance.create()) { 174 vulkan_devices.push_back(QString::fromStdString(name));
171 LOG_INFO(Frontend, "Vulkan 1.1 not available");
172 return;
173 }
174 const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>(
175 instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))};
176 if (vkEnumeratePhysicalDevices == nullptr) {
177 LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices");
178 return;
179 }
180 u32 physical_device_count;
181 if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) !=
182 VK_SUCCESS) {
183 LOG_INFO(Frontend, "Failed to get physical devices count");
184 return;
185 }
186 std::vector<VkPhysicalDevice> physical_devices(physical_device_count);
187 if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count,
188 physical_devices.data()) != VK_SUCCESS) {
189 LOG_INFO(Frontend, "Failed to get physical devices");
190 return;
191 }
192
193 const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>(
194 instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))};
195 if (vkGetPhysicalDeviceProperties == nullptr) {
196 LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties");
197 return;
198 }
199 for (const auto physical_device : physical_devices) {
200 VkPhysicalDeviceProperties properties;
201 vkGetPhysicalDeviceProperties(physical_device, &properties);
202 vulkan_devices.push_back(QString::fromUtf8(properties.deviceName));
203 } 175 }
204#endif 176#endif
205} 177}
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index 96dec50e2..15ac30f12 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -541,18 +541,19 @@ void ConfigureInputPlayer::HandleClick(
541 button->setText(tr("[press key]")); 541 button->setText(tr("[press key]"));
542 button->setFocus(); 542 button->setFocus();
543 543
544 const auto iter = std::find(button_map.begin(), button_map.end(), button); 544 // Keyboard keys can only be used as button devices
545 ASSERT(iter != button_map.end()); 545 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
546 const auto index = std::distance(button_map.begin(), iter); 546 if (want_keyboard_keys) {
547 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); 547 const auto iter = std::find(button_map.begin(), button_map.end(), button);
548 ASSERT(iter != button_map.end());
549 const auto index = std::distance(button_map.begin(), iter);
550 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
551 }
548 552
549 input_setter = new_input_setter; 553 input_setter = new_input_setter;
550 554
551 device_pollers = InputCommon::Polling::GetPollers(type); 555 device_pollers = InputCommon::Polling::GetPollers(type);
552 556
553 // Keyboard keys can only be used as button devices
554 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
555
556 for (auto& poller : device_pollers) { 557 for (auto& poller : device_pollers) {
557 poller->Start(); 558 poller->Start();
558 } 559 }
diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui
index c3a1b68f0..4b37746a1 100644
--- a/src/yuzu/configuration/configure_input_player.ui
+++ b/src/yuzu/configuration/configure_input_player.ui
@@ -927,7 +927,7 @@
927 </item> 927 </item>
928 </layout> 928 </layout>
929 </item> 929 </item>
930 <item row="2" column="0"> 930 <item row="0" column="2">
931 <layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout"> 931 <layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout">
932 <item> 932 <item>
933 <layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout"> 933 <layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout">
@@ -949,7 +949,7 @@
949 </item> 949 </item>
950 </layout> 950 </layout>
951 </item> 951 </item>
952 <item row="2" column="1"> 952 <item row="1" column="2">
953 <layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout"> 953 <layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout">
954 <item> 954 <item>
955 <layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout"> 955 <layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout">
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp
index ab3a11d30..0e0e8f113 100644
--- a/src/yuzu/configuration/configure_input_simple.cpp
+++ b/src/yuzu/configuration/configure_input_simple.cpp
@@ -35,6 +35,7 @@ void CallConfigureDialog(ConfigureInputSimple* caller, Args&&... args) {
35// - Open any dialogs 35// - Open any dialogs
36// - Block in any way 36// - Block in any way
37 37
38constexpr std::size_t PLAYER_0_INDEX = 0;
38constexpr std::size_t HANDHELD_INDEX = 8; 39constexpr std::size_t HANDHELD_INDEX = 8;
39 40
40void HandheldOnProfileSelect() { 41void HandheldOnProfileSelect() {
@@ -53,8 +54,8 @@ void HandheldOnProfileSelect() {
53} 54}
54 55
55void DualJoyconsDockedOnProfileSelect() { 56void DualJoyconsDockedOnProfileSelect() {
56 Settings::values.players[0].connected = true; 57 Settings::values.players[PLAYER_0_INDEX].connected = true;
57 Settings::values.players[0].type = Settings::ControllerType::DualJoycon; 58 Settings::values.players[PLAYER_0_INDEX].type = Settings::ControllerType::DualJoycon;
58 59
59 for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) { 60 for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) {
60 Settings::values.players[player].connected = false; 61 Settings::values.players[player].connected = false;
@@ -64,7 +65,7 @@ void DualJoyconsDockedOnProfileSelect() {
64 Settings::values.keyboard_enabled = false; 65 Settings::values.keyboard_enabled = false;
65 Settings::values.mouse_enabled = false; 66 Settings::values.mouse_enabled = false;
66 Settings::values.debug_pad_enabled = false; 67 Settings::values.debug_pad_enabled = false;
67 Settings::values.touchscreen.enabled = false; 68 Settings::values.touchscreen.enabled = true;
68} 69}
69 70
70// Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure 71// Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure
@@ -78,7 +79,7 @@ constexpr std::array<InputProfile, 3> INPUT_PROFILES{{
78 }}, 79 }},
79 {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect, 80 {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect,
80 [](ConfigureInputSimple* caller) { 81 [](ConfigureInputSimple* caller) {
81 CallConfigureDialog<ConfigureInputPlayer>(caller, 1, false); 82 CallConfigureDialog<ConfigureInputPlayer>(caller, PLAYER_0_INDEX, false);
82 }}, 83 }},
83 {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>}, 84 {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>},
84}}; 85}};
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp
index 0a4abe34f..e0647ea5b 100644
--- a/src/yuzu/configuration/configure_mouse_advanced.cpp
+++ b/src/yuzu/configuration/configure_mouse_advanced.cpp
@@ -184,18 +184,19 @@ void ConfigureMouseAdvanced::HandleClick(
184 button->setText(tr("[press key]")); 184 button->setText(tr("[press key]"));
185 button->setFocus(); 185 button->setFocus();
186 186
187 const auto iter = std::find(button_map.begin(), button_map.end(), button); 187 // Keyboard keys can only be used as button devices
188 ASSERT(iter != button_map.end()); 188 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
189 const auto index = std::distance(button_map.begin(), iter); 189 if (want_keyboard_keys) {
190 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); 190 const auto iter = std::find(button_map.begin(), button_map.end(), button);
191 ASSERT(iter != button_map.end());
192 const auto index = std::distance(button_map.begin(), iter);
193 ASSERT(index < Settings::NativeButton::NumButtons && index >= 0);
194 }
191 195
192 input_setter = new_input_setter; 196 input_setter = new_input_setter;
193 197
194 device_pollers = InputCommon::Polling::GetPollers(type); 198 device_pollers = InputCommon::Polling::GetPollers(type);
195 199
196 // Keyboard keys can only be used as button devices
197 want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button;
198
199 for (auto& poller : device_pollers) { 200 for (auto& poller : device_pollers) {
200 poller->Start(); 201 poller->Start();
201 } 202 }
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index a2b88c787..dccbabcbf 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -315,7 +315,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide
315 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); 315 item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type"));
316 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); 316 item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size"));
317 } 317 }
318 item_model->setSortRole(GameListItemPath::TitleRole); 318 item_model->setSortRole(GameListItemPath::SortRole);
319 319
320 connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); 320 connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons);
321 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); 321 connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry);
@@ -441,6 +441,8 @@ void GameList::DonePopulating(QStringList watch_list) {
441 if (children_total > 0) { 441 if (children_total > 0) {
442 search_field->setFocus(); 442 search_field->setFocus();
443 } 443 }
444 item_model->sort(tree_view->header()->sortIndicatorSection(),
445 tree_view->header()->sortIndicatorOrder());
444} 446}
445 447
446void GameList::PopupContextMenu(const QPoint& menu_location) { 448void GameList::PopupContextMenu(const QPoint& menu_location) {
@@ -666,8 +668,6 @@ void GameList::LoadInterfaceLayout() {
666 // so make it as large as possible as default. 668 // so make it as large as possible as default.
667 header->resizeSection(COLUMN_NAME, header->width()); 669 header->resizeSection(COLUMN_NAME, header->width());
668 } 670 }
669
670 item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
671} 671}
672 672
673const QStringList GameList::supported_file_extensions = { 673const QStringList GameList::supported_file_extensions = {
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h
index 7cde72d1b..3e6d5a7cd 100644
--- a/src/yuzu/game_list_p.h
+++ b/src/yuzu/game_list_p.h
@@ -65,10 +65,10 @@ public:
65 */ 65 */
66class GameListItemPath : public GameListItem { 66class GameListItemPath : public GameListItem {
67public: 67public:
68 static const int TitleRole = SortRole; 68 static const int TitleRole = SortRole + 1;
69 static const int FullPathRole = SortRole + 1; 69 static const int FullPathRole = SortRole + 2;
70 static const int ProgramIdRole = SortRole + 2; 70 static const int ProgramIdRole = SortRole + 3;
71 static const int FileTypeRole = SortRole + 3; 71 static const int FileTypeRole = SortRole + 4;
72 72
73 GameListItemPath() = default; 73 GameListItemPath() = default;
74 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, 74 GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data,
@@ -95,7 +95,7 @@ public:
95 } 95 }
96 96
97 QVariant data(int role) const override { 97 QVariant data(int role) const override {
98 if (role == Qt::DisplayRole) { 98 if (role == Qt::DisplayRole || role == SortRole) {
99 std::string filename; 99 std::string filename;
100 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, 100 Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename,
101 nullptr); 101 nullptr);
@@ -110,6 +110,9 @@ public:
110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id); 110 const auto& row1 = row_data.at(UISettings::values.row_1_text_id);
111 const int row2_id = UISettings::values.row_2_text_id; 111 const int row2_id = UISettings::values.row_2_text_id;
112 112
113 if (role == SortRole)
114 return row1.toLower();
115
113 if (row2_id == 4) // None 116 if (row2_id == 4) // None
114 return row1; 117 return row1;
115 118
@@ -123,6 +126,13 @@ public:
123 126
124 return GameListItem::data(role); 127 return GameListItem::data(role);
125 } 128 }
129
130 /**
131 * Override to prevent automatic sorting.
132 */
133 bool operator<(const QStandardItem& other) const override {
134 return false;
135 }
126}; 136};
127 137
128class GameListItemCompat : public GameListItem { 138class GameListItemCompat : public GameListItem {
@@ -289,6 +299,10 @@ public:
289 int type() const override { 299 int type() const override {
290 return static_cast<int>(GameListItemType::AddDir); 300 return static_cast<int>(GameListItemType::AddDir);
291 } 301 }
302
303 bool operator<(const QStandardItem& other) const override {
304 return false;
305 }
292}; 306};
293 307
294class GameList; 308class GameList;
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 940f24dc8..1717e06f9 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -205,7 +205,13 @@ GMainWindow::GMainWindow()
205 ConnectMenuEvents(); 205 ConnectMenuEvents();
206 ConnectWidgetEvents(); 206 ConnectWidgetEvents();
207 207
208 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, 208 const auto build_id = std::string(Common::g_build_id);
209 const auto fmt = std::string(Common::g_title_bar_format_idle);
210 const auto yuzu_build_version =
211 fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{},
212 std::string{}, std::string{}, std::string{}, build_id);
213
214 LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch,
209 Common::g_scm_desc); 215 Common::g_scm_desc);
210#ifdef ARCHITECTURE_x86_64 216#ifdef ARCHITECTURE_x86_64
211 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); 217 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 3522dcf6d..411e7e647 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -156,12 +156,6 @@ EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() {
156 SDL_GL_DeleteContext(window_context); 156 SDL_GL_DeleteContext(window_context);
157} 157}
158 158
159void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
160 void* surface) const {
161 // Should not have been called from OpenGL
162 UNREACHABLE();
163}
164
165std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { 159std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const {
166 return std::make_unique<SDLGLContext>(); 160 return std::make_unique<SDLGLContext>();
167} 161}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
index e092021d7..48bb41683 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h
@@ -15,10 +15,6 @@ public:
15 15
16 void Present() override; 16 void Present() override;
17 17
18 /// Ignored in OpenGL
19 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
20 void* surface) const override;
21
22 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 18 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
23 19
24private: 20private:
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
index 46d053f04..f2990910e 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp
@@ -2,102 +2,62 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <cstdlib>
6#include <memory>
6#include <string> 7#include <string>
7#include <vector> 8
8#include <SDL.h>
9#include <SDL_vulkan.h>
10#include <fmt/format.h> 9#include <fmt/format.h>
11#include <vulkan/vulkan.h> 10
12#include "common/assert.h" 11#include "common/assert.h"
13#include "common/logging/log.h" 12#include "common/logging/log.h"
14#include "common/scm_rev.h" 13#include "common/scm_rev.h"
15#include "core/settings.h" 14#include "core/settings.h"
15#include "video_core/renderer_vulkan/renderer_vulkan.h"
16#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" 16#include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h"
17 17
18// Include these late to avoid polluting everything with Xlib macros
19#include <SDL.h>
20#include <SDL_syswm.h>
21
18EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) 22EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
19 : EmuWindow_SDL2{system, fullscreen} { 23 : EmuWindow_SDL2{system, fullscreen} {
20 if (SDL_Vulkan_LoadLibrary(nullptr) != 0) {
21 LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError());
22 exit(EXIT_FAILURE);
23 }
24
25 vkGetInstanceProcAddr =
26 reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr());
27 if (vkGetInstanceProcAddr == nullptr) {
28 LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
29 exit(EXIT_FAILURE);
30 }
31
32 const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, 24 const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name,
33 Common::g_scm_branch, Common::g_scm_desc); 25 Common::g_scm_branch, Common::g_scm_desc);
34 render_window = 26 render_window =
35 SDL_CreateWindow(window_title.c_str(), 27 SDL_CreateWindow(window_title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
36 SDL_WINDOWPOS_UNDEFINED, // x position
37 SDL_WINDOWPOS_UNDEFINED, // y position
38 Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, 28 Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height,
39 SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN); 29 SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI);
40
41 const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr);
42
43 u32 extra_ext_count{};
44 if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) {
45 LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}",
46 SDL_GetError());
47 exit(1);
48 }
49
50 auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count);
51 if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) {
52 LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError());
53 exit(1);
54 }
55 std::vector<const char*> enabled_extensions;
56 enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(),
57 extra_ext_names.get() + extra_ext_count);
58
59 std::vector<const char*> enabled_layers;
60 if (use_standard_layers) {
61 enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
62 enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation");
63 }
64
65 VkApplicationInfo app_info{};
66 app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
67 app_info.apiVersion = VK_API_VERSION_1_1;
68 app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0);
69 app_info.pApplicationName = "yuzu-emu";
70 app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0);
71 app_info.pEngineName = "yuzu-emu";
72 30
73 VkInstanceCreateInfo instance_ci{}; 31 SDL_SysWMinfo wm;
74 instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; 32 if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) {
75 instance_ci.pApplicationInfo = &app_info; 33 LOG_CRITICAL(Frontend, "Failed to get information from the window manager");
76 instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()); 34 std::exit(EXIT_FAILURE);
77 instance_ci.ppEnabledExtensionNames = enabled_extensions.data();
78 if (Settings::values.renderer_debug) {
79 instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size());
80 instance_ci.ppEnabledLayerNames = enabled_layers.data();
81 } 35 }
82 36
83 const auto vkCreateInstance = 37 switch (wm.subsystem) {
84 reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance")); 38#ifdef SDL_VIDEO_DRIVER_WINDOWS
85 if (vkCreateInstance == nullptr || 39 case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS:
86 vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) { 40 window_info.type = Core::Frontend::WindowSystemType::Windows;
87 LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!"); 41 window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window);
88 exit(EXIT_FAILURE); 42 break;
89 } 43#endif
90 44#ifdef SDL_VIDEO_DRIVER_X11
91 vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( 45 case SDL_SYSWM_TYPE::SDL_SYSWM_X11:
92 vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance")); 46 window_info.type = Core::Frontend::WindowSystemType::X11;
93 if (vkDestroyInstance == nullptr) { 47 window_info.display_connection = wm.info.x11.display;
94 LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); 48 window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window);
95 exit(EXIT_FAILURE); 49 break;
96 } 50#endif
97 51#ifdef SDL_VIDEO_DRIVER_WAYLAND
98 if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) { 52 case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND:
99 LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError()); 53 window_info.type = Core::Frontend::WindowSystemType::Wayland;
100 exit(EXIT_FAILURE); 54 window_info.display_connection = wm.info.wl.display;
55 window_info.render_surface = wm.info.wl.surface;
56 break;
57#endif
58 default:
59 LOG_CRITICAL(Frontend, "Window manager subsystem not implemented");
60 std::exit(EXIT_FAILURE);
101 } 61 }
102 62
103 OnResize(); 63 OnResize();
@@ -107,51 +67,12 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen)
107 Common::g_scm_branch, Common::g_scm_desc); 67 Common::g_scm_branch, Common::g_scm_desc);
108} 68}
109 69
110EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { 70EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default;
111 vkDestroyInstance(vk_instance, nullptr);
112}
113
114void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
115 void* surface) const {
116 const auto instance_proc_addr = vkGetInstanceProcAddr;
117 std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr));
118 std::memcpy(instance, &vk_instance, sizeof(vk_instance));
119 std::memcpy(surface, &vk_surface, sizeof(vk_surface));
120}
121 71
122std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { 72std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const {
123 return nullptr; 73 return nullptr;
124} 74}
125 75
126bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const {
127 if (!Settings::values.renderer_debug) {
128 return false;
129 }
130
131 const auto vkEnumerateInstanceLayerProperties =
132 reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>(
133 vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties"));
134 if (vkEnumerateInstanceLayerProperties == nullptr) {
135 LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!");
136 return false;
137 }
138
139 u32 available_layers_count{};
140 if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) {
141 LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
142 return false;
143 }
144 std::vector<VkLayerProperties> layers(available_layers_count);
145 if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) {
146 LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!");
147 return false;
148 }
149
150 return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) {
151 return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation");
152 }) != layers.end();
153}
154
155void EmuWindow_SDL2_VK::Present() { 76void EmuWindow_SDL2_VK::Present() {
156 // TODO (bunnei): ImplementMe 77 // TODO (bunnei): ImplementMe
157} 78}
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
index 3dd1f3f61..b8021ebea 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h
@@ -4,27 +4,21 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <vulkan/vulkan.h> 7#include <memory>
8
8#include "core/frontend/emu_window.h" 9#include "core/frontend/emu_window.h"
9#include "yuzu_cmd/emu_window/emu_window_sdl2.h" 10#include "yuzu_cmd/emu_window/emu_window_sdl2.h"
10 11
12namespace Core {
13class System;
14}
15
11class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { 16class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 {
12public: 17public:
13 explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); 18 explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen);
14 ~EmuWindow_SDL2_VK(); 19 ~EmuWindow_SDL2_VK();
15 20
16 void Present() override; 21 void Present() override;
17 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
18 void* surface) const override;
19 22
20 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 23 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
21
22private:
23 bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const;
24
25 VkInstance vk_instance{};
26 VkSurfaceKHR vk_surface{};
27
28 PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{};
29 PFN_vkDestroyInstance vkDestroyInstance{};
30}; 24};
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
index a837430cc..8584f6671 100644
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
+++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp
@@ -116,10 +116,6 @@ bool EmuWindow_SDL2_Hide::IsShown() const {
116 return false; 116 return false;
117} 117}
118 118
119void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const {
120 UNREACHABLE();
121}
122
123class SDLGLContext : public Core::Frontend::GraphicsContext { 119class SDLGLContext : public Core::Frontend::GraphicsContext {
124public: 120public:
125 explicit SDLGLContext() { 121 explicit SDLGLContext() {
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
index 9f5d04fca..c13a82df2 100644
--- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
+++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h
@@ -19,10 +19,6 @@ public:
19 /// Whether the screen is being shown or not. 19 /// Whether the screen is being shown or not.
20 bool IsShown() const override; 20 bool IsShown() const override;
21 21
22 /// Retrieves Vulkan specific handlers from the window
23 void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance,
24 void* surface) const override;
25
26 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; 22 std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override;
27 23
28private: 24private: