diff options
Diffstat (limited to 'src')
134 files changed, 5435 insertions, 3550 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9d0af02fd..e40e9b0a5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -53,6 +53,7 @@ if (MSVC) | |||
| 53 | else() | 53 | else() |
| 54 | add_compile_options( | 54 | add_compile_options( |
| 55 | -Wall | 55 | -Wall |
| 56 | -Werror=reorder | ||
| 56 | -Wno-attributes | 57 | -Wno-attributes |
| 57 | ) | 58 | ) |
| 58 | 59 | ||
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index fbebed715..eeceaa655 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -106,6 +106,8 @@ add_library(common STATIC | |||
| 106 | common_funcs.h | 106 | common_funcs.h |
| 107 | common_paths.h | 107 | common_paths.h |
| 108 | common_types.h | 108 | common_types.h |
| 109 | dynamic_library.cpp | ||
| 110 | dynamic_library.h | ||
| 109 | file_util.cpp | 111 | file_util.cpp |
| 110 | file_util.h | 112 | file_util.h |
| 111 | hash.h | 113 | hash.h |
diff --git a/src/common/dynamic_library.cpp b/src/common/dynamic_library.cpp new file mode 100644 index 000000000..7ab54e9e4 --- /dev/null +++ b/src/common/dynamic_library.cpp | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | // Copyright 2019 Dolphin Emulator Project | ||
| 2 | // Licensed under GPLv2+ | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <string> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/dynamic_library.h" | ||
| 12 | |||
| 13 | #ifdef _WIN32 | ||
| 14 | #include <windows.h> | ||
| 15 | #else | ||
| 16 | #include <dlfcn.h> | ||
| 17 | #endif | ||
| 18 | |||
| 19 | namespace Common { | ||
| 20 | |||
| 21 | DynamicLibrary::DynamicLibrary() = default; | ||
| 22 | |||
| 23 | DynamicLibrary::DynamicLibrary(const char* filename) { | ||
| 24 | Open(filename); | ||
| 25 | } | ||
| 26 | |||
| 27 | DynamicLibrary::DynamicLibrary(DynamicLibrary&& rhs) noexcept | ||
| 28 | : handle{std::exchange(rhs.handle, nullptr)} {} | ||
| 29 | |||
| 30 | DynamicLibrary& DynamicLibrary::operator=(DynamicLibrary&& rhs) noexcept { | ||
| 31 | Close(); | ||
| 32 | handle = std::exchange(rhs.handle, nullptr); | ||
| 33 | return *this; | ||
| 34 | } | ||
| 35 | |||
| 36 | DynamicLibrary::~DynamicLibrary() { | ||
| 37 | Close(); | ||
| 38 | } | ||
| 39 | |||
| 40 | std::string DynamicLibrary::GetUnprefixedFilename(const char* filename) { | ||
| 41 | #if defined(_WIN32) | ||
| 42 | return std::string(filename) + ".dll"; | ||
| 43 | #elif defined(__APPLE__) | ||
| 44 | return std::string(filename) + ".dylib"; | ||
| 45 | #else | ||
| 46 | return std::string(filename) + ".so"; | ||
| 47 | #endif | ||
| 48 | } | ||
| 49 | |||
| 50 | std::string DynamicLibrary::GetVersionedFilename(const char* libname, int major, int minor) { | ||
| 51 | #if defined(_WIN32) | ||
| 52 | if (major >= 0 && minor >= 0) | ||
| 53 | return fmt::format("{}-{}-{}.dll", libname, major, minor); | ||
| 54 | else if (major >= 0) | ||
| 55 | return fmt::format("{}-{}.dll", libname, major); | ||
| 56 | else | ||
| 57 | return fmt::format("{}.dll", libname); | ||
| 58 | #elif defined(__APPLE__) | ||
| 59 | const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; | ||
| 60 | if (major >= 0 && minor >= 0) | ||
| 61 | return fmt::format("{}{}.{}.{}.dylib", prefix, libname, major, minor); | ||
| 62 | else if (major >= 0) | ||
| 63 | return fmt::format("{}{}.{}.dylib", prefix, libname, major); | ||
| 64 | else | ||
| 65 | return fmt::format("{}{}.dylib", prefix, libname); | ||
| 66 | #else | ||
| 67 | const char* prefix = std::strncmp(libname, "lib", 3) ? "lib" : ""; | ||
| 68 | if (major >= 0 && minor >= 0) | ||
| 69 | return fmt::format("{}{}.so.{}.{}", prefix, libname, major, minor); | ||
| 70 | else if (major >= 0) | ||
| 71 | return fmt::format("{}{}.so.{}", prefix, libname, major); | ||
| 72 | else | ||
| 73 | return fmt::format("{}{}.so", prefix, libname); | ||
| 74 | #endif | ||
| 75 | } | ||
| 76 | |||
| 77 | bool DynamicLibrary::Open(const char* filename) { | ||
| 78 | #ifdef _WIN32 | ||
| 79 | handle = reinterpret_cast<void*>(LoadLibraryA(filename)); | ||
| 80 | #else | ||
| 81 | handle = dlopen(filename, RTLD_NOW); | ||
| 82 | #endif | ||
| 83 | return handle != nullptr; | ||
| 84 | } | ||
| 85 | |||
| 86 | void DynamicLibrary::Close() { | ||
| 87 | if (!IsOpen()) | ||
| 88 | return; | ||
| 89 | |||
| 90 | #ifdef _WIN32 | ||
| 91 | FreeLibrary(reinterpret_cast<HMODULE>(handle)); | ||
| 92 | #else | ||
| 93 | dlclose(handle); | ||
| 94 | #endif | ||
| 95 | handle = nullptr; | ||
| 96 | } | ||
| 97 | |||
| 98 | void* DynamicLibrary::GetSymbolAddress(const char* name) const { | ||
| 99 | #ifdef _WIN32 | ||
| 100 | return reinterpret_cast<void*>(GetProcAddress(reinterpret_cast<HMODULE>(handle), name)); | ||
| 101 | #else | ||
| 102 | return reinterpret_cast<void*>(dlsym(handle, name)); | ||
| 103 | #endif | ||
| 104 | } | ||
| 105 | |||
| 106 | } // namespace Common | ||
diff --git a/src/common/dynamic_library.h b/src/common/dynamic_library.h new file mode 100644 index 000000000..2a06372fd --- /dev/null +++ b/src/common/dynamic_library.h | |||
| @@ -0,0 +1,75 @@ | |||
| 1 | // Copyright 2019 Dolphin Emulator Project | ||
| 2 | // Licensed under GPLv2+ | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | /** | ||
| 12 | * Provides a platform-independent interface for loading a dynamic library and retrieving symbols. | ||
| 13 | * The interface maintains an internal reference count to allow one handle to be shared between | ||
| 14 | * multiple users. | ||
| 15 | */ | ||
| 16 | class DynamicLibrary final { | ||
| 17 | public: | ||
| 18 | /// Default constructor, does not load a library. | ||
| 19 | explicit DynamicLibrary(); | ||
| 20 | |||
| 21 | /// Automatically loads the specified library. Call IsOpen() to check validity before use. | ||
| 22 | explicit DynamicLibrary(const char* filename); | ||
| 23 | |||
| 24 | /// Moves the library. | ||
| 25 | DynamicLibrary(DynamicLibrary&&) noexcept; | ||
| 26 | DynamicLibrary& operator=(DynamicLibrary&&) noexcept; | ||
| 27 | |||
| 28 | /// Delete copies, we can't copy a dynamic library. | ||
| 29 | DynamicLibrary(const DynamicLibrary&) = delete; | ||
| 30 | DynamicLibrary& operator=(const DynamicLibrary&) = delete; | ||
| 31 | |||
| 32 | /// Closes the library. | ||
| 33 | ~DynamicLibrary(); | ||
| 34 | |||
| 35 | /// Returns the specified library name with the platform-specific suffix added. | ||
| 36 | static std::string GetUnprefixedFilename(const char* filename); | ||
| 37 | |||
| 38 | /// Returns the specified library name in platform-specific format. | ||
| 39 | /// Major/minor versions will not be included if set to -1. | ||
| 40 | /// If libname already contains the "lib" prefix, it will not be added again. | ||
| 41 | /// Windows: LIBNAME-MAJOR-MINOR.dll | ||
| 42 | /// Linux: libLIBNAME.so.MAJOR.MINOR | ||
| 43 | /// Mac: libLIBNAME.MAJOR.MINOR.dylib | ||
| 44 | static std::string GetVersionedFilename(const char* libname, int major = -1, int minor = -1); | ||
| 45 | |||
| 46 | /// Returns true if a module is loaded, otherwise false. | ||
| 47 | bool IsOpen() const { | ||
| 48 | return handle != nullptr; | ||
| 49 | } | ||
| 50 | |||
| 51 | /// Loads (or replaces) the handle with the specified library file name. | ||
| 52 | /// Returns true if the library was loaded and can be used. | ||
| 53 | bool Open(const char* filename); | ||
| 54 | |||
| 55 | /// Unloads the library, any function pointers from this library are no longer valid. | ||
| 56 | void Close(); | ||
| 57 | |||
| 58 | /// Returns the address of the specified symbol (function or variable) as an untyped pointer. | ||
| 59 | /// If the specified symbol does not exist in this library, nullptr is returned. | ||
| 60 | void* GetSymbolAddress(const char* name) const; | ||
| 61 | |||
| 62 | /// Obtains the address of the specified symbol, automatically casting to the correct type. | ||
| 63 | /// Returns true if the symbol was found and assigned, otherwise false. | ||
| 64 | template <typename T> | ||
| 65 | bool GetSymbol(const char* name, T* ptr) const { | ||
| 66 | *ptr = reinterpret_cast<T>(GetSymbolAddress(name)); | ||
| 67 | return *ptr != nullptr; | ||
| 68 | } | ||
| 69 | |||
| 70 | private: | ||
| 71 | /// Platform-dependent data type representing a dynamic library handle. | ||
| 72 | void* handle = nullptr; | ||
| 73 | }; | ||
| 74 | |||
| 75 | } // namespace Common | ||
diff --git a/src/common/file_util.cpp b/src/common/file_util.cpp index 41167f57a..35eee0096 100644 --- a/src/common/file_util.cpp +++ b/src/common/file_util.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <limits> | ||
| 6 | #include <memory> | 7 | #include <memory> |
| 7 | #include <sstream> | 8 | #include <sstream> |
| 8 | #include <unordered_map> | 9 | #include <unordered_map> |
| @@ -530,11 +531,11 @@ void CopyDir(const std::string& source_path, const std::string& dest_path) { | |||
| 530 | std::optional<std::string> GetCurrentDir() { | 531 | std::optional<std::string> GetCurrentDir() { |
| 531 | // Get the current working directory (getcwd uses malloc) | 532 | // Get the current working directory (getcwd uses malloc) |
| 532 | #ifdef _WIN32 | 533 | #ifdef _WIN32 |
| 533 | wchar_t* dir; | 534 | wchar_t* dir = _wgetcwd(nullptr, 0); |
| 534 | if (!(dir = _wgetcwd(nullptr, 0))) { | 535 | if (!dir) { |
| 535 | #else | 536 | #else |
| 536 | char* dir; | 537 | char* dir = getcwd(nullptr, 0); |
| 537 | if (!(dir = getcwd(nullptr, 0))) { | 538 | if (!dir) { |
| 538 | #endif | 539 | #endif |
| 539 | LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg()); | 540 | LOG_ERROR(Common_Filesystem, "GetCurrentDirectory failed: {}", GetLastErrorMsg()); |
| 540 | return {}; | 541 | return {}; |
| @@ -918,19 +919,22 @@ void IOFile::Swap(IOFile& other) noexcept { | |||
| 918 | 919 | ||
| 919 | bool IOFile::Open(const std::string& filename, const char openmode[], int flags) { | 920 | bool IOFile::Open(const std::string& filename, const char openmode[], int flags) { |
| 920 | Close(); | 921 | Close(); |
| 922 | bool m_good; | ||
| 921 | #ifdef _WIN32 | 923 | #ifdef _WIN32 |
| 922 | if (flags != 0) { | 924 | if (flags != 0) { |
| 923 | m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(), | 925 | m_file = _wfsopen(Common::UTF8ToUTF16W(filename).c_str(), |
| 924 | Common::UTF8ToUTF16W(openmode).c_str(), flags); | 926 | Common::UTF8ToUTF16W(openmode).c_str(), flags); |
| 927 | m_good = m_file != nullptr; | ||
| 925 | } else { | 928 | } else { |
| 926 | _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(), | 929 | m_good = _wfopen_s(&m_file, Common::UTF8ToUTF16W(filename).c_str(), |
| 927 | Common::UTF8ToUTF16W(openmode).c_str()); | 930 | Common::UTF8ToUTF16W(openmode).c_str()) == 0; |
| 928 | } | 931 | } |
| 929 | #else | 932 | #else |
| 930 | m_file = fopen(filename.c_str(), openmode); | 933 | m_file = std::fopen(filename.c_str(), openmode); |
| 934 | m_good = m_file != nullptr; | ||
| 931 | #endif | 935 | #endif |
| 932 | 936 | ||
| 933 | return IsOpen(); | 937 | return m_good; |
| 934 | } | 938 | } |
| 935 | 939 | ||
| 936 | bool IOFile::Close() { | 940 | bool IOFile::Close() { |
| @@ -956,7 +960,7 @@ u64 IOFile::Tell() const { | |||
| 956 | if (IsOpen()) | 960 | if (IsOpen()) |
| 957 | return ftello(m_file); | 961 | return ftello(m_file); |
| 958 | 962 | ||
| 959 | return -1; | 963 | return std::numeric_limits<u64>::max(); |
| 960 | } | 964 | } |
| 961 | 965 | ||
| 962 | bool IOFile::Flush() { | 966 | bool IOFile::Flush() { |
diff --git a/src/common/thread.cpp b/src/common/thread.cpp index fe7a420cc..0cd2d10bf 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp | |||
| @@ -28,11 +28,8 @@ namespace Common { | |||
| 28 | #ifdef _MSC_VER | 28 | #ifdef _MSC_VER |
| 29 | 29 | ||
| 30 | // Sets the debugger-visible name of the current thread. | 30 | // Sets the debugger-visible name of the current thread. |
| 31 | // Uses undocumented (actually, it is now documented) trick. | 31 | // Uses trick documented in: |
| 32 | // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vsdebug/html/vxtsksettingthreadname.asp | 32 | // https://docs.microsoft.com/en-us/visualstudio/debugger/how-to-set-a-thread-name-in-native-code |
| 33 | |||
| 34 | // This is implemented much nicer in upcoming msvc++, see: | ||
| 35 | // http://msdn.microsoft.com/en-us/library/xcb2z8hs(VS.100).aspx | ||
| 36 | void SetCurrentThreadName(const char* name) { | 33 | void SetCurrentThreadName(const char* name) { |
| 37 | static const DWORD MS_VC_EXCEPTION = 0x406D1388; | 34 | static const DWORD MS_VC_EXCEPTION = 0x406D1388; |
| 38 | 35 | ||
| @@ -47,7 +44,7 @@ void SetCurrentThreadName(const char* name) { | |||
| 47 | 44 | ||
| 48 | info.dwType = 0x1000; | 45 | info.dwType = 0x1000; |
| 49 | info.szName = name; | 46 | info.szName = name; |
| 50 | info.dwThreadID = -1; // dwThreadID; | 47 | info.dwThreadID = std::numeric_limits<DWORD>::max(); |
| 51 | info.dwFlags = 0; | 48 | info.dwFlags = 0; |
| 52 | 49 | ||
| 53 | __try { | 50 | __try { |
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index e226e9711..e77e82b8d 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp | |||
| @@ -348,6 +348,12 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t | |||
| 348 | if (ext_dir != nullptr) | 348 | if (ext_dir != nullptr) |
| 349 | layers_ext.push_back(std::move(ext_dir)); | 349 | layers_ext.push_back(std::move(ext_dir)); |
| 350 | } | 350 | } |
| 351 | |||
| 352 | // When there are no layers to apply, return early as there is no need to rebuild the RomFS | ||
| 353 | if (layers.empty() && layers_ext.empty()) { | ||
| 354 | return; | ||
| 355 | } | ||
| 356 | |||
| 351 | layers.push_back(std::move(extracted)); | 357 | layers.push_back(std::move(extracted)); |
| 352 | 358 | ||
| 353 | auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers)); | 359 | auto layered = LayeredVfsDirectory::MakeLayeredDirectory(std::move(layers)); |
diff --git a/src/core/file_sys/romfs.cpp b/src/core/file_sys/romfs.cpp index c909d1ce4..120032134 100644 --- a/src/core/file_sys/romfs.cpp +++ b/src/core/file_sys/romfs.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <memory> | 5 | #include <memory> |
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "common/string_util.h" | ||
| 8 | #include "common/swap.h" | 9 | #include "common/swap.h" |
| 9 | #include "core/file_sys/fsmitm_romfsbuild.h" | 10 | #include "core/file_sys/fsmitm_romfsbuild.h" |
| 10 | #include "core/file_sys/romfs.h" | 11 | #include "core/file_sys/romfs.h" |
| @@ -126,7 +127,7 @@ VirtualDir ExtractRomFS(VirtualFile file, RomFSExtractionType type) { | |||
| 126 | return out->GetSubdirectories().front(); | 127 | return out->GetSubdirectories().front(); |
| 127 | 128 | ||
| 128 | while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) { | 129 | while (out->GetSubdirectories().size() == 1 && out->GetFiles().empty()) { |
| 129 | if (out->GetSubdirectories().front()->GetName() == "data" && | 130 | if (Common::ToLower(out->GetSubdirectories().front()->GetName()) == "data" && |
| 130 | type == RomFSExtractionType::Truncated) | 131 | type == RomFSExtractionType::Truncated) |
| 131 | break; | 132 | break; |
| 132 | out = out->GetSubdirectories().front(); | 133 | out = out->GetSubdirectories().front(); |
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 72294d4d8..13aa14934 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h | |||
| @@ -12,6 +12,15 @@ | |||
| 12 | 12 | ||
| 13 | namespace Core::Frontend { | 13 | namespace Core::Frontend { |
| 14 | 14 | ||
| 15 | /// Information for the Graphics Backends signifying what type of screen pointer is in | ||
| 16 | /// WindowInformation | ||
| 17 | enum class WindowSystemType { | ||
| 18 | Headless, | ||
| 19 | Windows, | ||
| 20 | X11, | ||
| 21 | Wayland, | ||
| 22 | }; | ||
| 23 | |||
| 15 | /** | 24 | /** |
| 16 | * Represents a drawing context that supports graphics operations. | 25 | * Represents a drawing context that supports graphics operations. |
| 17 | */ | 26 | */ |
| @@ -76,6 +85,23 @@ public: | |||
| 76 | std::pair<unsigned, unsigned> min_client_area_size; | 85 | std::pair<unsigned, unsigned> min_client_area_size; |
| 77 | }; | 86 | }; |
| 78 | 87 | ||
| 88 | /// Data describing host window system information | ||
| 89 | struct WindowSystemInfo { | ||
| 90 | // Window system type. Determines which GL context or Vulkan WSI is used. | ||
| 91 | WindowSystemType type = WindowSystemType::Headless; | ||
| 92 | |||
| 93 | // Connection to a display server. This is used on X11 and Wayland platforms. | ||
| 94 | void* display_connection = nullptr; | ||
| 95 | |||
| 96 | // Render surface. This is a pointer to the native window handle, which depends | ||
| 97 | // on the platform. e.g. HWND for Windows, Window for X11. If the surface is | ||
| 98 | // set to nullptr, the video backend will run in headless mode. | ||
| 99 | void* render_surface = nullptr; | ||
| 100 | |||
| 101 | // Scale of the render surface. For hidpi systems, this will be >1. | ||
| 102 | float render_surface_scale = 1.0f; | ||
| 103 | }; | ||
| 104 | |||
| 79 | /// Polls window events | 105 | /// Polls window events |
| 80 | virtual void PollEvents() = 0; | 106 | virtual void PollEvents() = 0; |
| 81 | 107 | ||
| @@ -87,10 +113,6 @@ public: | |||
| 87 | /// Returns if window is shown (not minimized) | 113 | /// Returns if window is shown (not minimized) |
| 88 | virtual bool IsShown() const = 0; | 114 | virtual bool IsShown() const = 0; |
| 89 | 115 | ||
| 90 | /// Retrieves Vulkan specific handlers from the window | ||
| 91 | virtual void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 92 | void* surface) const = 0; | ||
| 93 | |||
| 94 | /** | 116 | /** |
| 95 | * Signal that a touch pressed event has occurred (e.g. mouse click pressed) | 117 | * Signal that a touch pressed event has occurred (e.g. mouse click pressed) |
| 96 | * @param framebuffer_x Framebuffer x-coordinate that was pressed | 118 | * @param framebuffer_x Framebuffer x-coordinate that was pressed |
| @@ -128,6 +150,13 @@ public: | |||
| 128 | } | 150 | } |
| 129 | 151 | ||
| 130 | /** | 152 | /** |
| 153 | * Returns system information about the drawing area. | ||
| 154 | */ | ||
| 155 | const WindowSystemInfo& GetWindowInfo() const { | ||
| 156 | return window_info; | ||
| 157 | } | ||
| 158 | |||
| 159 | /** | ||
| 131 | * Gets the framebuffer layout (width, height, and screen regions) | 160 | * Gets the framebuffer layout (width, height, and screen regions) |
| 132 | * @note This method is thread-safe | 161 | * @note This method is thread-safe |
| 133 | */ | 162 | */ |
| @@ -142,7 +171,7 @@ public: | |||
| 142 | void UpdateCurrentFramebufferLayout(unsigned width, unsigned height); | 171 | void UpdateCurrentFramebufferLayout(unsigned width, unsigned height); |
| 143 | 172 | ||
| 144 | protected: | 173 | protected: |
| 145 | EmuWindow(); | 174 | explicit EmuWindow(); |
| 146 | virtual ~EmuWindow(); | 175 | virtual ~EmuWindow(); |
| 147 | 176 | ||
| 148 | /** | 177 | /** |
| @@ -179,6 +208,8 @@ protected: | |||
| 179 | client_area_height = size.second; | 208 | client_area_height = size.second; |
| 180 | } | 209 | } |
| 181 | 210 | ||
| 211 | WindowSystemInfo window_info; | ||
| 212 | |||
| 182 | private: | 213 | private: |
| 183 | /** | 214 | /** |
| 184 | * Handler called when the minimal client area was requested to be changed via SetConfig. | 215 | * Handler called when the minimal client area was requested to be changed via SetConfig. |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index e47f1deed..014d647cf 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -103,7 +103,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ | |||
| 103 | 103 | ||
| 104 | struct KernelCore::Impl { | 104 | struct KernelCore::Impl { |
| 105 | explicit Impl(Core::System& system, KernelCore& kernel) | 105 | explicit Impl(Core::System& system, KernelCore& kernel) |
| 106 | : system{system}, global_scheduler{kernel}, synchronization{system}, time_manager{system} {} | 106 | : global_scheduler{kernel}, synchronization{system}, time_manager{system}, system{system} {} |
| 107 | 107 | ||
| 108 | void Initialize(KernelCore& kernel) { | 108 | void Initialize(KernelCore& kernel) { |
| 109 | Shutdown(); | 109 | Shutdown(); |
diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 6aadb3ea8..7938b4b80 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp | |||
| @@ -27,7 +27,7 @@ public: | |||
| 27 | {10110, nullptr, "GetFriendProfileImage"}, | 27 | {10110, nullptr, "GetFriendProfileImage"}, |
| 28 | {10200, nullptr, "SendFriendRequestForApplication"}, | 28 | {10200, nullptr, "SendFriendRequestForApplication"}, |
| 29 | {10211, nullptr, "AddFacedFriendRequestForApplication"}, | 29 | {10211, nullptr, "AddFacedFriendRequestForApplication"}, |
| 30 | {10400, nullptr, "GetBlockedUserListIds"}, | 30 | {10400, &IFriendService::GetBlockedUserListIds, "GetBlockedUserListIds"}, |
| 31 | {10500, nullptr, "GetProfileList"}, | 31 | {10500, nullptr, "GetProfileList"}, |
| 32 | {10600, nullptr, "DeclareOpenOnlinePlaySession"}, | 32 | {10600, nullptr, "DeclareOpenOnlinePlaySession"}, |
| 33 | {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, | 33 | {10601, &IFriendService::DeclareCloseOnlinePlaySession, "DeclareCloseOnlinePlaySession"}, |
| @@ -121,6 +121,15 @@ private: | |||
| 121 | }; | 121 | }; |
| 122 | static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); | 122 | static_assert(sizeof(SizedFriendFilter) == 0x10, "SizedFriendFilter is an invalid size"); |
| 123 | 123 | ||
| 124 | void GetBlockedUserListIds(Kernel::HLERequestContext& ctx) { | ||
| 125 | // This is safe to stub, as there should be no adverse consequences from reporting no | ||
| 126 | // blocked users. | ||
| 127 | LOG_WARNING(Service_ACC, "(STUBBED) called"); | ||
| 128 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 129 | rb.Push(RESULT_SUCCESS); | ||
| 130 | rb.Push<u32>(0); // Indicates there are no blocked users | ||
| 131 | } | ||
| 132 | |||
| 124 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { | 133 | void DeclareCloseOnlinePlaySession(Kernel::HLERequestContext& ctx) { |
| 125 | // Stub used by Splatoon 2 | 134 | // Stub used by Splatoon 2 |
| 126 | LOG_WARNING(Service_ACC, "(STUBBED) called"); | 135 | LOG_WARNING(Service_ACC, "(STUBBED) called"); |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 32b6f4b27..f1e3d832a 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp | |||
| @@ -28,6 +28,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 28 | buffer.slot = slot; | 28 | buffer.slot = slot; |
| 29 | buffer.igbp_buffer = igbp_buffer; | 29 | buffer.igbp_buffer = igbp_buffer; |
| 30 | buffer.status = Buffer::Status::Free; | 30 | buffer.status = Buffer::Status::Free; |
| 31 | free_buffers.push_back(slot); | ||
| 31 | 32 | ||
| 32 | queue.emplace_back(buffer); | 33 | queue.emplace_back(buffer); |
| 33 | buffer_wait_event.writable->Signal(); | 34 | buffer_wait_event.writable->Signal(); |
| @@ -35,16 +36,37 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) | |||
| 35 | 36 | ||
| 36 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, | 37 | std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, |
| 37 | u32 height) { | 38 | u32 height) { |
| 38 | auto itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { | ||
| 39 | // Only consider free buffers. Buffers become free once again after they've been Acquired | ||
| 40 | // and Released by the compositor, see the NVFlinger::Compose method. | ||
| 41 | if (buffer.status != Buffer::Status::Free) { | ||
| 42 | return false; | ||
| 43 | } | ||
| 44 | 39 | ||
| 45 | // Make sure that the parameters match. | 40 | if (free_buffers.empty()) { |
| 46 | return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; | 41 | return {}; |
| 47 | }); | 42 | } |
| 43 | |||
| 44 | auto f_itr = free_buffers.begin(); | ||
| 45 | auto itr = queue.end(); | ||
| 46 | |||
| 47 | while (f_itr != free_buffers.end()) { | ||
| 48 | auto slot = *f_itr; | ||
| 49 | itr = std::find_if(queue.begin(), queue.end(), [&](const Buffer& buffer) { | ||
| 50 | // Only consider free buffers. Buffers become free once again after they've been | ||
| 51 | // Acquired and Released by the compositor, see the NVFlinger::Compose method. | ||
| 52 | if (buffer.status != Buffer::Status::Free) { | ||
| 53 | return false; | ||
| 54 | } | ||
| 55 | |||
| 56 | if (buffer.slot != slot) { | ||
| 57 | return false; | ||
| 58 | } | ||
| 59 | |||
| 60 | // Make sure that the parameters match. | ||
| 61 | return buffer.igbp_buffer.width == width && buffer.igbp_buffer.height == height; | ||
| 62 | }); | ||
| 63 | |||
| 64 | if (itr != queue.end()) { | ||
| 65 | free_buffers.erase(f_itr); | ||
| 66 | break; | ||
| 67 | } | ||
| 68 | ++f_itr; | ||
| 69 | } | ||
| 48 | 70 | ||
| 49 | if (itr == queue.end()) { | 71 | if (itr == queue.end()) { |
| 50 | return {}; | 72 | return {}; |
| @@ -99,10 +121,18 @@ void BufferQueue::ReleaseBuffer(u32 slot) { | |||
| 99 | ASSERT(itr != queue.end()); | 121 | ASSERT(itr != queue.end()); |
| 100 | ASSERT(itr->status == Buffer::Status::Acquired); | 122 | ASSERT(itr->status == Buffer::Status::Acquired); |
| 101 | itr->status = Buffer::Status::Free; | 123 | itr->status = Buffer::Status::Free; |
| 124 | free_buffers.push_back(slot); | ||
| 102 | 125 | ||
| 103 | buffer_wait_event.writable->Signal(); | 126 | buffer_wait_event.writable->Signal(); |
| 104 | } | 127 | } |
| 105 | 128 | ||
| 129 | void BufferQueue::Disconnect() { | ||
| 130 | queue.clear(); | ||
| 131 | queue_sequence.clear(); | ||
| 132 | id = 1; | ||
| 133 | layer_id = 1; | ||
| 134 | } | ||
| 135 | |||
| 106 | u32 BufferQueue::Query(QueryType type) { | 136 | u32 BufferQueue::Query(QueryType type) { |
| 107 | LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); | 137 | LOG_WARNING(Service, "(STUBBED) called type={}", static_cast<u32>(type)); |
| 108 | 138 | ||
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index f4bbfd945..d5f31e567 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h | |||
| @@ -87,6 +87,7 @@ public: | |||
| 87 | Service::Nvidia::MultiFence& multi_fence); | 87 | Service::Nvidia::MultiFence& multi_fence); |
| 88 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); | 88 | std::optional<std::reference_wrapper<const Buffer>> AcquireBuffer(); |
| 89 | void ReleaseBuffer(u32 slot); | 89 | void ReleaseBuffer(u32 slot); |
| 90 | void Disconnect(); | ||
| 90 | u32 Query(QueryType type); | 91 | u32 Query(QueryType type); |
| 91 | 92 | ||
| 92 | u32 GetId() const { | 93 | u32 GetId() const { |
| @@ -101,6 +102,7 @@ private: | |||
| 101 | u32 id; | 102 | u32 id; |
| 102 | u64 layer_id; | 103 | u64 layer_id; |
| 103 | 104 | ||
| 105 | std::list<u32> free_buffers; | ||
| 104 | std::vector<Buffer> queue; | 106 | std::vector<Buffer> queue; |
| 105 | std::list<u32> queue_sequence; | 107 | std::list<u32> queue_sequence; |
| 106 | Kernel::EventPair buffer_wait_event; | 108 | Kernel::EventPair buffer_wait_event; |
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 519da74e0..fdc62d05b 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -513,7 +513,8 @@ private: | |||
| 513 | 513 | ||
| 514 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); | 514 | auto& buffer_queue = nv_flinger->FindBufferQueue(id); |
| 515 | 515 | ||
| 516 | if (transaction == TransactionId::Connect) { | 516 | switch (transaction) { |
| 517 | case TransactionId::Connect: { | ||
| 517 | IGBPConnectRequestParcel request{ctx.ReadBuffer()}; | 518 | IGBPConnectRequestParcel request{ctx.ReadBuffer()}; |
| 518 | IGBPConnectResponseParcel response{ | 519 | IGBPConnectResponseParcel response{ |
| 519 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * | 520 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedWidth) * |
| @@ -521,14 +522,18 @@ private: | |||
| 521 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * | 522 | static_cast<u32>(static_cast<u32>(DisplayResolution::UndockedHeight) * |
| 522 | Settings::values.resolution_factor)}; | 523 | Settings::values.resolution_factor)}; |
| 523 | ctx.WriteBuffer(response.Serialize()); | 524 | ctx.WriteBuffer(response.Serialize()); |
| 524 | } else if (transaction == TransactionId::SetPreallocatedBuffer) { | 525 | break; |
| 526 | } | ||
| 527 | case TransactionId::SetPreallocatedBuffer: { | ||
| 525 | IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; | 528 | IGBPSetPreallocatedBufferRequestParcel request{ctx.ReadBuffer()}; |
| 526 | 529 | ||
| 527 | buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); | 530 | buffer_queue.SetPreallocatedBuffer(request.data.slot, request.buffer); |
| 528 | 531 | ||
| 529 | IGBPSetPreallocatedBufferResponseParcel response{}; | 532 | IGBPSetPreallocatedBufferResponseParcel response{}; |
| 530 | ctx.WriteBuffer(response.Serialize()); | 533 | ctx.WriteBuffer(response.Serialize()); |
| 531 | } else if (transaction == TransactionId::DequeueBuffer) { | 534 | break; |
| 535 | } | ||
| 536 | case TransactionId::DequeueBuffer: { | ||
| 532 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; | 537 | IGBPDequeueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 533 | const u32 width{request.data.width}; | 538 | const u32 width{request.data.width}; |
| 534 | const u32 height{request.data.height}; | 539 | const u32 height{request.data.height}; |
| @@ -556,14 +561,18 @@ private: | |||
| 556 | }, | 561 | }, |
| 557 | buffer_queue.GetWritableBufferWaitEvent()); | 562 | buffer_queue.GetWritableBufferWaitEvent()); |
| 558 | } | 563 | } |
| 559 | } else if (transaction == TransactionId::RequestBuffer) { | 564 | break; |
| 565 | } | ||
| 566 | case TransactionId::RequestBuffer: { | ||
| 560 | IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; | 567 | IGBPRequestBufferRequestParcel request{ctx.ReadBuffer()}; |
| 561 | 568 | ||
| 562 | auto& buffer = buffer_queue.RequestBuffer(request.slot); | 569 | auto& buffer = buffer_queue.RequestBuffer(request.slot); |
| 563 | 570 | ||
| 564 | IGBPRequestBufferResponseParcel response{buffer}; | 571 | IGBPRequestBufferResponseParcel response{buffer}; |
| 565 | ctx.WriteBuffer(response.Serialize()); | 572 | ctx.WriteBuffer(response.Serialize()); |
| 566 | } else if (transaction == TransactionId::QueueBuffer) { | 573 | break; |
| 574 | } | ||
| 575 | case TransactionId::QueueBuffer: { | ||
| 567 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; | 576 | IGBPQueueBufferRequestParcel request{ctx.ReadBuffer()}; |
| 568 | 577 | ||
| 569 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, | 578 | buffer_queue.QueueBuffer(request.data.slot, request.data.transform, |
| @@ -572,7 +581,9 @@ private: | |||
| 572 | 581 | ||
| 573 | IGBPQueueBufferResponseParcel response{1280, 720}; | 582 | IGBPQueueBufferResponseParcel response{1280, 720}; |
| 574 | ctx.WriteBuffer(response.Serialize()); | 583 | ctx.WriteBuffer(response.Serialize()); |
| 575 | } else if (transaction == TransactionId::Query) { | 584 | break; |
| 585 | } | ||
| 586 | case TransactionId::Query: { | ||
| 576 | IGBPQueryRequestParcel request{ctx.ReadBuffer()}; | 587 | IGBPQueryRequestParcel request{ctx.ReadBuffer()}; |
| 577 | 588 | ||
| 578 | const u32 value = | 589 | const u32 value = |
| @@ -580,15 +591,30 @@ private: | |||
| 580 | 591 | ||
| 581 | IGBPQueryResponseParcel response{value}; | 592 | IGBPQueryResponseParcel response{value}; |
| 582 | ctx.WriteBuffer(response.Serialize()); | 593 | ctx.WriteBuffer(response.Serialize()); |
| 583 | } else if (transaction == TransactionId::CancelBuffer) { | 594 | break; |
| 595 | } | ||
| 596 | case TransactionId::CancelBuffer: { | ||
| 584 | LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer"); | 597 | LOG_CRITICAL(Service_VI, "(STUBBED) called, transaction=CancelBuffer"); |
| 585 | } else if (transaction == TransactionId::Disconnect || | 598 | break; |
| 586 | transaction == TransactionId::DetachBuffer) { | 599 | } |
| 600 | case TransactionId::Disconnect: { | ||
| 601 | LOG_WARNING(Service_VI, "(STUBBED) called, transaction=Disconnect"); | ||
| 602 | const auto buffer = ctx.ReadBuffer(); | ||
| 603 | |||
| 604 | buffer_queue.Disconnect(); | ||
| 605 | |||
| 606 | IGBPEmptyResponseParcel response{}; | ||
| 607 | ctx.WriteBuffer(response.Serialize()); | ||
| 608 | break; | ||
| 609 | } | ||
| 610 | case TransactionId::DetachBuffer: { | ||
| 587 | const auto buffer = ctx.ReadBuffer(); | 611 | const auto buffer = ctx.ReadBuffer(); |
| 588 | 612 | ||
| 589 | IGBPEmptyResponseParcel response{}; | 613 | IGBPEmptyResponseParcel response{}; |
| 590 | ctx.WriteBuffer(response.Serialize()); | 614 | ctx.WriteBuffer(response.Serialize()); |
| 591 | } else { | 615 | break; |
| 616 | } | ||
| 617 | default: | ||
| 592 | ASSERT_MSG(false, "Unimplemented"); | 618 | ASSERT_MSG(false, "Unimplemented"); |
| 593 | } | 619 | } |
| 594 | 620 | ||
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f0888327f..6061d37ae 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -242,7 +242,52 @@ struct Memory::Impl { | |||
| 242 | } | 242 | } |
| 243 | case Common::PageType::RasterizerCachedMemory: { | 243 | case Common::PageType::RasterizerCachedMemory: { |
| 244 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 244 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 245 | system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); | 245 | system.GPU().FlushRegion(current_vaddr, copy_amount); |
| 246 | std::memcpy(dest_buffer, host_ptr, copy_amount); | ||
| 247 | break; | ||
| 248 | } | ||
| 249 | default: | ||
| 250 | UNREACHABLE(); | ||
| 251 | } | ||
| 252 | |||
| 253 | page_index++; | ||
| 254 | page_offset = 0; | ||
| 255 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 256 | remaining_size -= copy_amount; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, | ||
| 261 | const std::size_t size) { | ||
| 262 | const auto& page_table = process.VMManager().page_table; | ||
| 263 | |||
| 264 | std::size_t remaining_size = size; | ||
| 265 | std::size_t page_index = src_addr >> PAGE_BITS; | ||
| 266 | std::size_t page_offset = src_addr & PAGE_MASK; | ||
| 267 | |||
| 268 | while (remaining_size > 0) { | ||
| 269 | const std::size_t copy_amount = | ||
| 270 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | ||
| 271 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | ||
| 272 | |||
| 273 | switch (page_table.attributes[page_index]) { | ||
| 274 | case Common::PageType::Unmapped: { | ||
| 275 | LOG_ERROR(HW_Memory, | ||
| 276 | "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 277 | current_vaddr, src_addr, size); | ||
| 278 | std::memset(dest_buffer, 0, copy_amount); | ||
| 279 | break; | ||
| 280 | } | ||
| 281 | case Common::PageType::Memory: { | ||
| 282 | DEBUG_ASSERT(page_table.pointers[page_index]); | ||
| 283 | |||
| 284 | const u8* const src_ptr = | ||
| 285 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 286 | std::memcpy(dest_buffer, src_ptr, copy_amount); | ||
| 287 | break; | ||
| 288 | } | ||
| 289 | case Common::PageType::RasterizerCachedMemory: { | ||
| 290 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||
| 246 | std::memcpy(dest_buffer, host_ptr, copy_amount); | 291 | std::memcpy(dest_buffer, host_ptr, copy_amount); |
| 247 | break; | 292 | break; |
| 248 | } | 293 | } |
| @@ -261,6 +306,10 @@ struct Memory::Impl { | |||
| 261 | ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); | 306 | ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); |
| 262 | } | 307 | } |
| 263 | 308 | ||
| 309 | void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { | ||
| 310 | ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size); | ||
| 311 | } | ||
| 312 | |||
| 264 | void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, | 313 | void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, |
| 265 | const std::size_t size) { | 314 | const std::size_t size) { |
| 266 | const auto& page_table = process.VMManager().page_table; | 315 | const auto& page_table = process.VMManager().page_table; |
| @@ -290,7 +339,50 @@ struct Memory::Impl { | |||
| 290 | } | 339 | } |
| 291 | case Common::PageType::RasterizerCachedMemory: { | 340 | case Common::PageType::RasterizerCachedMemory: { |
| 292 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 341 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 293 | system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); | 342 | system.GPU().InvalidateRegion(current_vaddr, copy_amount); |
| 343 | std::memcpy(host_ptr, src_buffer, copy_amount); | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | default: | ||
| 347 | UNREACHABLE(); | ||
| 348 | } | ||
| 349 | |||
| 350 | page_index++; | ||
| 351 | page_offset = 0; | ||
| 352 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||
| 353 | remaining_size -= copy_amount; | ||
| 354 | } | ||
| 355 | } | ||
| 356 | |||
| 357 | void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr, | ||
| 358 | const void* src_buffer, const std::size_t size) { | ||
| 359 | const auto& page_table = process.VMManager().page_table; | ||
| 360 | std::size_t remaining_size = size; | ||
| 361 | std::size_t page_index = dest_addr >> PAGE_BITS; | ||
| 362 | std::size_t page_offset = dest_addr & PAGE_MASK; | ||
| 363 | |||
| 364 | while (remaining_size > 0) { | ||
| 365 | const std::size_t copy_amount = | ||
| 366 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | ||
| 367 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | ||
| 368 | |||
| 369 | switch (page_table.attributes[page_index]) { | ||
| 370 | case Common::PageType::Unmapped: { | ||
| 371 | LOG_ERROR(HW_Memory, | ||
| 372 | "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 373 | current_vaddr, dest_addr, size); | ||
| 374 | break; | ||
| 375 | } | ||
| 376 | case Common::PageType::Memory: { | ||
| 377 | DEBUG_ASSERT(page_table.pointers[page_index]); | ||
| 378 | |||
| 379 | u8* const dest_ptr = | ||
| 380 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 381 | std::memcpy(dest_ptr, src_buffer, copy_amount); | ||
| 382 | break; | ||
| 383 | } | ||
| 384 | case Common::PageType::RasterizerCachedMemory: { | ||
| 385 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||
| 294 | std::memcpy(host_ptr, src_buffer, copy_amount); | 386 | std::memcpy(host_ptr, src_buffer, copy_amount); |
| 295 | break; | 387 | break; |
| 296 | } | 388 | } |
| @@ -309,6 +401,10 @@ struct Memory::Impl { | |||
| 309 | WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); | 401 | WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); |
| 310 | } | 402 | } |
| 311 | 403 | ||
| 404 | void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { | ||
| 405 | WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size); | ||
| 406 | } | ||
| 407 | |||
| 312 | void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { | 408 | void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { |
| 313 | const auto& page_table = process.VMManager().page_table; | 409 | const auto& page_table = process.VMManager().page_table; |
| 314 | std::size_t remaining_size = size; | 410 | std::size_t remaining_size = size; |
| @@ -337,7 +433,7 @@ struct Memory::Impl { | |||
| 337 | } | 433 | } |
| 338 | case Common::PageType::RasterizerCachedMemory: { | 434 | case Common::PageType::RasterizerCachedMemory: { |
| 339 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 435 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 340 | system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); | 436 | system.GPU().InvalidateRegion(current_vaddr, copy_amount); |
| 341 | std::memset(host_ptr, 0, copy_amount); | 437 | std::memset(host_ptr, 0, copy_amount); |
| 342 | break; | 438 | break; |
| 343 | } | 439 | } |
| @@ -384,7 +480,7 @@ struct Memory::Impl { | |||
| 384 | } | 480 | } |
| 385 | case Common::PageType::RasterizerCachedMemory: { | 481 | case Common::PageType::RasterizerCachedMemory: { |
| 386 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 482 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 387 | system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); | 483 | system.GPU().FlushRegion(current_vaddr, copy_amount); |
| 388 | WriteBlock(process, dest_addr, host_ptr, copy_amount); | 484 | WriteBlock(process, dest_addr, host_ptr, copy_amount); |
| 389 | break; | 485 | break; |
| 390 | } | 486 | } |
| @@ -545,7 +641,7 @@ struct Memory::Impl { | |||
| 545 | break; | 641 | break; |
| 546 | case Common::PageType::RasterizerCachedMemory: { | 642 | case Common::PageType::RasterizerCachedMemory: { |
| 547 | const u8* const host_ptr = GetPointerFromVMA(vaddr); | 643 | const u8* const host_ptr = GetPointerFromVMA(vaddr); |
| 548 | system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); | 644 | system.GPU().FlushRegion(vaddr, sizeof(T)); |
| 549 | T value; | 645 | T value; |
| 550 | std::memcpy(&value, host_ptr, sizeof(T)); | 646 | std::memcpy(&value, host_ptr, sizeof(T)); |
| 551 | return value; | 647 | return value; |
| @@ -587,7 +683,7 @@ struct Memory::Impl { | |||
| 587 | break; | 683 | break; |
| 588 | case Common::PageType::RasterizerCachedMemory: { | 684 | case Common::PageType::RasterizerCachedMemory: { |
| 589 | u8* const host_ptr{GetPointerFromVMA(vaddr)}; | 685 | u8* const host_ptr{GetPointerFromVMA(vaddr)}; |
| 590 | system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); | 686 | system.GPU().InvalidateRegion(vaddr, sizeof(T)); |
| 591 | std::memcpy(host_ptr, &data, sizeof(T)); | 687 | std::memcpy(host_ptr, &data, sizeof(T)); |
| 592 | break; | 688 | break; |
| 593 | } | 689 | } |
| @@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_ | |||
| 696 | impl->ReadBlock(src_addr, dest_buffer, size); | 792 | impl->ReadBlock(src_addr, dest_buffer, size); |
| 697 | } | 793 | } |
| 698 | 794 | ||
| 795 | void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, | ||
| 796 | void* dest_buffer, const std::size_t size) { | ||
| 797 | impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size); | ||
| 798 | } | ||
| 799 | |||
| 800 | void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { | ||
| 801 | impl->ReadBlockUnsafe(src_addr, dest_buffer, size); | ||
| 802 | } | ||
| 803 | |||
| 699 | void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, | 804 | void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, |
| 700 | std::size_t size) { | 805 | std::size_t size) { |
| 701 | impl->WriteBlock(process, dest_addr, src_buffer, size); | 806 | impl->WriteBlock(process, dest_addr, src_buffer, size); |
| @@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std | |||
| 705 | impl->WriteBlock(dest_addr, src_buffer, size); | 810 | impl->WriteBlock(dest_addr, src_buffer, size); |
| 706 | } | 811 | } |
| 707 | 812 | ||
| 813 | void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, | ||
| 814 | const void* src_buffer, std::size_t size) { | ||
| 815 | impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size); | ||
| 816 | } | ||
| 817 | |||
| 818 | void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, | ||
| 819 | const std::size_t size) { | ||
| 820 | impl->WriteBlockUnsafe(dest_addr, src_buffer, size); | ||
| 821 | } | ||
| 822 | |||
| 708 | void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { | 823 | void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { |
| 709 | impl->ZeroBlock(process, dest_addr, size); | 824 | impl->ZeroBlock(process, dest_addr, size); |
| 710 | } | 825 | } |
diff --git a/src/core/memory.h b/src/core/memory.h index 8913a9da4..b92d678a4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -295,6 +295,27 @@ public: | |||
| 295 | std::size_t size); | 295 | std::size_t size); |
| 296 | 296 | ||
| 297 | /** | 297 | /** |
| 298 | * Reads a contiguous block of bytes from a specified process' address space. | ||
| 299 | * This unsafe version does not trigger GPU flushing. | ||
| 300 | * | ||
| 301 | * @param process The process to read the data from. | ||
| 302 | * @param src_addr The virtual address to begin reading from. | ||
| 303 | * @param dest_buffer The buffer to place the read bytes into. | ||
| 304 | * @param size The amount of data to read, in bytes. | ||
| 305 | * | ||
| 306 | * @note If a size of 0 is specified, then this function reads nothing and | ||
| 307 | * no attempts to access memory are made at all. | ||
| 308 | * | ||
| 309 | * @pre dest_buffer must be at least size bytes in length, otherwise a | ||
| 310 | * buffer overrun will occur. | ||
| 311 | * | ||
| 312 | * @post The range [dest_buffer, size) contains the read bytes from the | ||
| 313 | * process' address space. | ||
| 314 | */ | ||
| 315 | void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, | ||
| 316 | std::size_t size); | ||
| 317 | |||
| 318 | /** | ||
| 298 | * Reads a contiguous block of bytes from the current process' address space. | 319 | * Reads a contiguous block of bytes from the current process' address space. |
| 299 | * | 320 | * |
| 300 | * @param src_addr The virtual address to begin reading from. | 321 | * @param src_addr The virtual address to begin reading from. |
| @@ -313,6 +334,25 @@ public: | |||
| 313 | void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); | 334 | void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); |
| 314 | 335 | ||
| 315 | /** | 336 | /** |
| 337 | * Reads a contiguous block of bytes from the current process' address space. | ||
| 338 | * This unsafe version does not trigger GPU flushing. | ||
| 339 | * | ||
| 340 | * @param src_addr The virtual address to begin reading from. | ||
| 341 | * @param dest_buffer The buffer to place the read bytes into. | ||
| 342 | * @param size The amount of data to read, in bytes. | ||
| 343 | * | ||
| 344 | * @note If a size of 0 is specified, then this function reads nothing and | ||
| 345 | * no attempts to access memory are made at all. | ||
| 346 | * | ||
| 347 | * @pre dest_buffer must be at least size bytes in length, otherwise a | ||
| 348 | * buffer overrun will occur. | ||
| 349 | * | ||
| 350 | * @post The range [dest_buffer, size) contains the read bytes from the | ||
| 351 | * current process' address space. | ||
| 352 | */ | ||
| 353 | void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size); | ||
| 354 | |||
| 355 | /** | ||
| 316 | * Writes a range of bytes into a given process' address space at the specified | 356 | * Writes a range of bytes into a given process' address space at the specified |
| 317 | * virtual address. | 357 | * virtual address. |
| 318 | * | 358 | * |
| @@ -336,6 +376,26 @@ public: | |||
| 336 | std::size_t size); | 376 | std::size_t size); |
| 337 | 377 | ||
| 338 | /** | 378 | /** |
| 379 | * Writes a range of bytes into a given process' address space at the specified | ||
| 380 | * virtual address. | ||
| 381 | * This unsafe version does not invalidate GPU Memory. | ||
| 382 | * | ||
| 383 | * @param process The process to write data into the address space of. | ||
| 384 | * @param dest_addr The destination virtual address to begin writing the data at. | ||
| 385 | * @param src_buffer The data to write into the process' address space. | ||
| 386 | * @param size The size of the data to write, in bytes. | ||
| 387 | * | ||
| 388 | * @post The address range [dest_addr, size) in the process' address space | ||
| 389 | * contains the data that was within src_buffer. | ||
| 390 | * | ||
| 391 | * @post If an attempt is made to write into an unmapped region of memory, the writes | ||
| 392 | * will be ignored and an error will be logged. | ||
| 393 | * | ||
| 394 | */ | ||
| 395 | void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, | ||
| 396 | std::size_t size); | ||
| 397 | |||
| 398 | /** | ||
| 339 | * Writes a range of bytes into the current process' address space at the specified | 399 | * Writes a range of bytes into the current process' address space at the specified |
| 340 | * virtual address. | 400 | * virtual address. |
| 341 | * | 401 | * |
| @@ -357,6 +417,24 @@ public: | |||
| 357 | void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); | 417 | void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); |
| 358 | 418 | ||
| 359 | /** | 419 | /** |
| 420 | * Writes a range of bytes into the current process' address space at the specified | ||
| 421 | * virtual address. | ||
| 422 | * This unsafe version does not invalidate GPU Memory. | ||
| 423 | * | ||
| 424 | * @param dest_addr The destination virtual address to begin writing the data at. | ||
| 425 | * @param src_buffer The data to write into the current process' address space. | ||
| 426 | * @param size The size of the data to write, in bytes. | ||
| 427 | * | ||
| 428 | * @post The address range [dest_addr, size) in the current process' address space | ||
| 429 | * contains the data that was within src_buffer. | ||
| 430 | * | ||
| 431 | * @post If an attempt is made to write into an unmapped region of memory, the writes | ||
| 432 | * will be ignored and an error will be logged. | ||
| 433 | * | ||
| 434 | */ | ||
| 435 | void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size); | ||
| 436 | |||
| 437 | /** | ||
| 360 | * Fills the specified address range within a process' address space with zeroes. | 438 | * Fills the specified address range within a process' address space with zeroes. |
| 361 | * | 439 | * |
| 362 | * @param process The process that will have a portion of its memory zeroed out. | 440 | * @param process The process that will have a portion of its memory zeroed out. |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index effe76a63..258d58eba 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -148,6 +148,7 @@ add_library(video_core STATIC | |||
| 148 | textures/convert.h | 148 | textures/convert.h |
| 149 | textures/decoders.cpp | 149 | textures/decoders.cpp |
| 150 | textures/decoders.h | 150 | textures/decoders.h |
| 151 | textures/texture.cpp | ||
| 151 | textures/texture.h | 152 | textures/texture.h |
| 152 | video_core.cpp | 153 | video_core.cpp |
| 153 | video_core.h | 154 | video_core.h |
| @@ -155,7 +156,6 @@ add_library(video_core STATIC | |||
| 155 | 156 | ||
| 156 | if (ENABLE_VULKAN) | 157 | if (ENABLE_VULKAN) |
| 157 | target_sources(video_core PRIVATE | 158 | target_sources(video_core PRIVATE |
| 158 | renderer_vulkan/declarations.h | ||
| 159 | renderer_vulkan/fixed_pipeline_state.cpp | 159 | renderer_vulkan/fixed_pipeline_state.cpp |
| 160 | renderer_vulkan/fixed_pipeline_state.h | 160 | renderer_vulkan/fixed_pipeline_state.h |
| 161 | renderer_vulkan/maxwell_to_vk.cpp | 161 | renderer_vulkan/maxwell_to_vk.cpp |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index 4b9193182..e35ee0b67 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h | |||
| @@ -15,37 +15,29 @@ namespace VideoCommon { | |||
| 15 | 15 | ||
| 16 | class BufferBlock { | 16 | class BufferBlock { |
| 17 | public: | 17 | public: |
| 18 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | 18 | bool Overlaps(const VAddr start, const VAddr end) const { |
| 19 | return (cache_addr < end) && (cache_addr_end > start); | 19 | return (cpu_addr < end) && (cpu_addr_end > start); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | 22 | bool IsInside(const VAddr other_start, const VAddr other_end) const { |
| 23 | return cache_addr <= other_start && other_end <= cache_addr_end; | 23 | return cpu_addr <= other_start && other_end <= cpu_addr_end; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | u8* GetWritableHostPtr() const { | 26 | std::size_t GetOffset(const VAddr in_addr) { |
| 27 | return FromCacheAddr(cache_addr); | 27 | return static_cast<std::size_t>(in_addr - cpu_addr); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | u8* GetWritableHostPtr(std::size_t offset) const { | 30 | VAddr GetCpuAddr() const { |
| 31 | return FromCacheAddr(cache_addr + offset); | 31 | return cpu_addr; |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | std::size_t GetOffset(const CacheAddr in_addr) { | 34 | VAddr GetCpuAddrEnd() const { |
| 35 | return static_cast<std::size_t>(in_addr - cache_addr); | 35 | return cpu_addr_end; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | CacheAddr GetCacheAddr() const { | 38 | void SetCpuAddr(const VAddr new_addr) { |
| 39 | return cache_addr; | 39 | cpu_addr = new_addr; |
| 40 | } | 40 | cpu_addr_end = new_addr + size; |
| 41 | |||
| 42 | CacheAddr GetCacheAddrEnd() const { | ||
| 43 | return cache_addr_end; | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 47 | cache_addr = new_addr; | ||
| 48 | cache_addr_end = new_addr + size; | ||
| 49 | } | 41 | } |
| 50 | 42 | ||
| 51 | std::size_t GetSize() const { | 43 | std::size_t GetSize() const { |
| @@ -61,14 +53,14 @@ public: | |||
| 61 | } | 53 | } |
| 62 | 54 | ||
| 63 | protected: | 55 | protected: |
| 64 | explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { | 56 | explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { |
| 65 | SetCacheAddr(cache_addr); | 57 | SetCpuAddr(cpu_addr); |
| 66 | } | 58 | } |
| 67 | ~BufferBlock() = default; | 59 | ~BufferBlock() = default; |
| 68 | 60 | ||
| 69 | private: | 61 | private: |
| 70 | CacheAddr cache_addr{}; | 62 | VAddr cpu_addr{}; |
| 71 | CacheAddr cache_addr_end{}; | 63 | VAddr cpu_addr_end{}; |
| 72 | std::size_t size{}; | 64 | std::size_t size{}; |
| 73 | u64 epoch{}; | 65 | u64 epoch{}; |
| 74 | }; | 66 | }; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 186aca61d..b57c0d4d4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "common/alignment.h" | 19 | #include "common/alignment.h" |
| 20 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 21 | #include "core/core.h" | 21 | #include "core/core.h" |
| 22 | #include "core/memory.h" | ||
| 22 | #include "video_core/buffer_cache/buffer_block.h" | 23 | #include "video_core/buffer_cache/buffer_block.h" |
| 23 | #include "video_core/buffer_cache/map_interval.h" | 24 | #include "video_core/buffer_cache/map_interval.h" |
| 24 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| @@ -37,28 +38,45 @@ public: | |||
| 37 | bool is_written = false, bool use_fast_cbuf = false) { | 38 | bool is_written = false, bool use_fast_cbuf = false) { |
| 38 | std::lock_guard lock{mutex}; | 39 | std::lock_guard lock{mutex}; |
| 39 | 40 | ||
| 40 | auto& memory_manager = system.GPU().MemoryManager(); | 41 | const std::optional<VAddr> cpu_addr_opt = |
| 41 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 42 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 42 | if (!host_ptr) { | 43 | |
| 44 | if (!cpu_addr_opt) { | ||
| 43 | return {GetEmptyBuffer(size), 0}; | 45 | return {GetEmptyBuffer(size), 0}; |
| 44 | } | 46 | } |
| 45 | const auto cache_addr = ToCacheAddr(host_ptr); | 47 | |
| 48 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 46 | 49 | ||
| 47 | // Cache management is a big overhead, so only cache entries with a given size. | 50 | // Cache management is a big overhead, so only cache entries with a given size. |
| 48 | // TODO: Figure out which size is the best for given games. | 51 | // TODO: Figure out which size is the best for given games. |
| 49 | constexpr std::size_t max_stream_size = 0x800; | 52 | constexpr std::size_t max_stream_size = 0x800; |
| 50 | if (use_fast_cbuf || size < max_stream_size) { | 53 | if (use_fast_cbuf || size < max_stream_size) { |
| 51 | if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { | 54 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { |
| 55 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 52 | if (use_fast_cbuf) { | 56 | if (use_fast_cbuf) { |
| 53 | return ConstBufferUpload(host_ptr, size); | 57 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 58 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 59 | return ConstBufferUpload(host_ptr, size); | ||
| 60 | } else { | ||
| 61 | staging_buffer.resize(size); | ||
| 62 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 63 | return ConstBufferUpload(staging_buffer.data(), size); | ||
| 64 | } | ||
| 54 | } else { | 65 | } else { |
| 55 | return StreamBufferUpload(host_ptr, size, alignment); | 66 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 67 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 68 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 69 | } else { | ||
| 70 | staging_buffer.resize(size); | ||
| 71 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 72 | return StreamBufferUpload(staging_buffer.data(), size, alignment); | ||
| 73 | } | ||
| 56 | } | 74 | } |
| 57 | } | 75 | } |
| 58 | } | 76 | } |
| 59 | 77 | ||
| 60 | auto block = GetBlock(cache_addr, size); | 78 | auto block = GetBlock(cpu_addr, size); |
| 61 | auto map = MapAddress(block, gpu_addr, cache_addr, size); | 79 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 62 | if (is_written) { | 80 | if (is_written) { |
| 63 | map->MarkAsModified(true, GetModifiedTicks()); | 81 | map->MarkAsModified(true, GetModifiedTicks()); |
| 64 | if (!map->IsWritten()) { | 82 | if (!map->IsWritten()) { |
| @@ -71,7 +89,7 @@ public: | |||
| 71 | } | 89 | } |
| 72 | } | 90 | } |
| 73 | 91 | ||
| 74 | const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); | 92 | const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); |
| 75 | 93 | ||
| 76 | return {ToHandle(block), offset}; | 94 | return {ToHandle(block), offset}; |
| 77 | } | 95 | } |
| @@ -112,7 +130,7 @@ public: | |||
| 112 | } | 130 | } |
| 113 | 131 | ||
| 114 | /// Write any cached resources overlapping the specified region back to memory | 132 | /// Write any cached resources overlapping the specified region back to memory |
| 115 | void FlushRegion(CacheAddr addr, std::size_t size) { | 133 | void FlushRegion(VAddr addr, std::size_t size) { |
| 116 | std::lock_guard lock{mutex}; | 134 | std::lock_guard lock{mutex}; |
| 117 | 135 | ||
| 118 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 136 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
| @@ -127,7 +145,7 @@ public: | |||
| 127 | } | 145 | } |
| 128 | 146 | ||
| 129 | /// Mark the specified region as being invalidated | 147 | /// Mark the specified region as being invalidated |
| 130 | void InvalidateRegion(CacheAddr addr, u64 size) { | 148 | void InvalidateRegion(VAddr addr, u64 size) { |
| 131 | std::lock_guard lock{mutex}; | 149 | std::lock_guard lock{mutex}; |
| 132 | 150 | ||
| 133 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 151 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
| @@ -152,7 +170,7 @@ protected: | |||
| 152 | 170 | ||
| 153 | virtual void WriteBarrier() = 0; | 171 | virtual void WriteBarrier() = 0; |
| 154 | 172 | ||
| 155 | virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; | 173 | virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; |
| 156 | 174 | ||
| 157 | virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | 175 | virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, |
| 158 | const u8* data) = 0; | 176 | const u8* data) = 0; |
| @@ -169,20 +187,17 @@ protected: | |||
| 169 | 187 | ||
| 170 | /// Register an object into the cache | 188 | /// Register an object into the cache |
| 171 | void Register(const MapInterval& new_map, bool inherit_written = false) { | 189 | void Register(const MapInterval& new_map, bool inherit_written = false) { |
| 172 | const CacheAddr cache_ptr = new_map->GetStart(); | 190 | const VAddr cpu_addr = new_map->GetStart(); |
| 173 | const std::optional<VAddr> cpu_addr = | 191 | if (!cpu_addr) { |
| 174 | system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); | ||
| 175 | if (!cache_ptr || !cpu_addr) { | ||
| 176 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | 192 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", |
| 177 | new_map->GetGpuAddress()); | 193 | new_map->GetGpuAddress()); |
| 178 | return; | 194 | return; |
| 179 | } | 195 | } |
| 180 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | 196 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); |
| 181 | new_map->SetCpuAddress(*cpu_addr); | ||
| 182 | new_map->MarkAsRegistered(true); | 197 | new_map->MarkAsRegistered(true); |
| 183 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | 198 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; |
| 184 | mapped_addresses.insert({interval, new_map}); | 199 | mapped_addresses.insert({interval, new_map}); |
| 185 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | 200 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 186 | if (inherit_written) { | 201 | if (inherit_written) { |
| 187 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | 202 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); |
| 188 | new_map->MarkAsWritten(true); | 203 | new_map->MarkAsWritten(true); |
| @@ -192,7 +207,7 @@ protected: | |||
| 192 | /// Unregisters an object from the cache | 207 | /// Unregisters an object from the cache |
| 193 | void Unregister(MapInterval& map) { | 208 | void Unregister(MapInterval& map) { |
| 194 | const std::size_t size = map->GetEnd() - map->GetStart(); | 209 | const std::size_t size = map->GetEnd() - map->GetStart(); |
| 195 | rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); | 210 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); |
| 196 | map->MarkAsRegistered(false); | 211 | map->MarkAsRegistered(false); |
| 197 | if (map->IsWritten()) { | 212 | if (map->IsWritten()) { |
| 198 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 213 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
| @@ -202,32 +217,39 @@ protected: | |||
| 202 | } | 217 | } |
| 203 | 218 | ||
| 204 | private: | 219 | private: |
| 205 | MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { | 220 | MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { |
| 206 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | 221 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); |
| 207 | } | 222 | } |
| 208 | 223 | ||
| 209 | MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, | 224 | MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, |
| 210 | const CacheAddr cache_addr, const std::size_t size) { | 225 | const std::size_t size) { |
| 211 | 226 | ||
| 212 | std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); | 227 | std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); |
| 213 | if (overlaps.empty()) { | 228 | if (overlaps.empty()) { |
| 214 | const CacheAddr cache_addr_end = cache_addr + size; | 229 | auto& memory_manager = system.GPU().MemoryManager(); |
| 215 | MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); | 230 | const VAddr cpu_addr_end = cpu_addr + size; |
| 216 | u8* host_ptr = FromCacheAddr(cache_addr); | 231 | MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); |
| 217 | UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); | 232 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 233 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 234 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); | ||
| 235 | } else { | ||
| 236 | staging_buffer.resize(size); | ||
| 237 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 238 | UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); | ||
| 239 | } | ||
| 218 | Register(new_map); | 240 | Register(new_map); |
| 219 | return new_map; | 241 | return new_map; |
| 220 | } | 242 | } |
| 221 | 243 | ||
| 222 | const CacheAddr cache_addr_end = cache_addr + size; | 244 | const VAddr cpu_addr_end = cpu_addr + size; |
| 223 | if (overlaps.size() == 1) { | 245 | if (overlaps.size() == 1) { |
| 224 | MapInterval& current_map = overlaps[0]; | 246 | MapInterval& current_map = overlaps[0]; |
| 225 | if (current_map->IsInside(cache_addr, cache_addr_end)) { | 247 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { |
| 226 | return current_map; | 248 | return current_map; |
| 227 | } | 249 | } |
| 228 | } | 250 | } |
| 229 | CacheAddr new_start = cache_addr; | 251 | VAddr new_start = cpu_addr; |
| 230 | CacheAddr new_end = cache_addr_end; | 252 | VAddr new_end = cpu_addr_end; |
| 231 | bool write_inheritance = false; | 253 | bool write_inheritance = false; |
| 232 | bool modified_inheritance = false; | 254 | bool modified_inheritance = false; |
| 233 | // Calculate new buffer parameters | 255 | // Calculate new buffer parameters |
| @@ -237,7 +259,7 @@ private: | |||
| 237 | write_inheritance |= overlap->IsWritten(); | 259 | write_inheritance |= overlap->IsWritten(); |
| 238 | modified_inheritance |= overlap->IsModified(); | 260 | modified_inheritance |= overlap->IsModified(); |
| 239 | } | 261 | } |
| 240 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; | 262 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; |
| 241 | for (auto& overlap : overlaps) { | 263 | for (auto& overlap : overlaps) { |
| 242 | Unregister(overlap); | 264 | Unregister(overlap); |
| 243 | } | 265 | } |
| @@ -250,7 +272,7 @@ private: | |||
| 250 | return new_map; | 272 | return new_map; |
| 251 | } | 273 | } |
| 252 | 274 | ||
| 253 | void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, | 275 | void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, |
| 254 | std::vector<MapInterval>& overlaps) { | 276 | std::vector<MapInterval>& overlaps) { |
| 255 | const IntervalType base_interval{start, end}; | 277 | const IntervalType base_interval{start, end}; |
| 256 | IntervalSet interval_set{}; | 278 | IntervalSet interval_set{}; |
| @@ -262,13 +284,15 @@ private: | |||
| 262 | for (auto& interval : interval_set) { | 284 | for (auto& interval : interval_set) { |
| 263 | std::size_t size = interval.upper() - interval.lower(); | 285 | std::size_t size = interval.upper() - interval.lower(); |
| 264 | if (size > 0) { | 286 | if (size > 0) { |
| 265 | u8* host_ptr = FromCacheAddr(interval.lower()); | 287 | staging_buffer.resize(size); |
| 266 | UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); | 288 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); |
| 289 | UploadBlockData(block, block->GetOffset(interval.lower()), size, | ||
| 290 | staging_buffer.data()); | ||
| 267 | } | 291 | } |
| 268 | } | 292 | } |
| 269 | } | 293 | } |
| 270 | 294 | ||
| 271 | std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { | 295 | std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { |
| 272 | if (size == 0) { | 296 | if (size == 0) { |
| 273 | return {}; | 297 | return {}; |
| 274 | } | 298 | } |
| @@ -290,8 +314,9 @@ private: | |||
| 290 | void FlushMap(MapInterval map) { | 314 | void FlushMap(MapInterval map) { |
| 291 | std::size_t size = map->GetEnd() - map->GetStart(); | 315 | std::size_t size = map->GetEnd() - map->GetStart(); |
| 292 | TBuffer block = blocks[map->GetStart() >> block_page_bits]; | 316 | TBuffer block = blocks[map->GetStart() >> block_page_bits]; |
| 293 | u8* host_ptr = FromCacheAddr(map->GetStart()); | 317 | staging_buffer.resize(size); |
| 294 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); | 318 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); |
| 319 | system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); | ||
| 295 | map->MarkAsModified(false, 0); | 320 | map->MarkAsModified(false, 0); |
| 296 | } | 321 | } |
| 297 | 322 | ||
| @@ -316,14 +341,14 @@ private: | |||
| 316 | TBuffer EnlargeBlock(TBuffer buffer) { | 341 | TBuffer EnlargeBlock(TBuffer buffer) { |
| 317 | const std::size_t old_size = buffer->GetSize(); | 342 | const std::size_t old_size = buffer->GetSize(); |
| 318 | const std::size_t new_size = old_size + block_page_size; | 343 | const std::size_t new_size = old_size + block_page_size; |
| 319 | const CacheAddr cache_addr = buffer->GetCacheAddr(); | 344 | const VAddr cpu_addr = buffer->GetCpuAddr(); |
| 320 | TBuffer new_buffer = CreateBlock(cache_addr, new_size); | 345 | TBuffer new_buffer = CreateBlock(cpu_addr, new_size); |
| 321 | CopyBlock(buffer, new_buffer, 0, 0, old_size); | 346 | CopyBlock(buffer, new_buffer, 0, 0, old_size); |
| 322 | buffer->SetEpoch(epoch); | 347 | buffer->SetEpoch(epoch); |
| 323 | pending_destruction.push_back(buffer); | 348 | pending_destruction.push_back(buffer); |
| 324 | const CacheAddr cache_addr_end = cache_addr + new_size - 1; | 349 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; |
| 325 | u64 page_start = cache_addr >> block_page_bits; | 350 | u64 page_start = cpu_addr >> block_page_bits; |
| 326 | const u64 page_end = cache_addr_end >> block_page_bits; | 351 | const u64 page_end = cpu_addr_end >> block_page_bits; |
| 327 | while (page_start <= page_end) { | 352 | while (page_start <= page_end) { |
| 328 | blocks[page_start] = new_buffer; | 353 | blocks[page_start] = new_buffer; |
| 329 | ++page_start; | 354 | ++page_start; |
| @@ -334,9 +359,9 @@ private: | |||
| 334 | TBuffer MergeBlocks(TBuffer first, TBuffer second) { | 359 | TBuffer MergeBlocks(TBuffer first, TBuffer second) { |
| 335 | const std::size_t size_1 = first->GetSize(); | 360 | const std::size_t size_1 = first->GetSize(); |
| 336 | const std::size_t size_2 = second->GetSize(); | 361 | const std::size_t size_2 = second->GetSize(); |
| 337 | const CacheAddr first_addr = first->GetCacheAddr(); | 362 | const VAddr first_addr = first->GetCpuAddr(); |
| 338 | const CacheAddr second_addr = second->GetCacheAddr(); | 363 | const VAddr second_addr = second->GetCpuAddr(); |
| 339 | const CacheAddr new_addr = std::min(first_addr, second_addr); | 364 | const VAddr new_addr = std::min(first_addr, second_addr); |
| 340 | const std::size_t new_size = size_1 + size_2; | 365 | const std::size_t new_size = size_1 + size_2; |
| 341 | TBuffer new_buffer = CreateBlock(new_addr, new_size); | 366 | TBuffer new_buffer = CreateBlock(new_addr, new_size); |
| 342 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | 367 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); |
| @@ -345,9 +370,9 @@ private: | |||
| 345 | second->SetEpoch(epoch); | 370 | second->SetEpoch(epoch); |
| 346 | pending_destruction.push_back(first); | 371 | pending_destruction.push_back(first); |
| 347 | pending_destruction.push_back(second); | 372 | pending_destruction.push_back(second); |
| 348 | const CacheAddr cache_addr_end = new_addr + new_size - 1; | 373 | const VAddr cpu_addr_end = new_addr + new_size - 1; |
| 349 | u64 page_start = new_addr >> block_page_bits; | 374 | u64 page_start = new_addr >> block_page_bits; |
| 350 | const u64 page_end = cache_addr_end >> block_page_bits; | 375 | const u64 page_end = cpu_addr_end >> block_page_bits; |
| 351 | while (page_start <= page_end) { | 376 | while (page_start <= page_end) { |
| 352 | blocks[page_start] = new_buffer; | 377 | blocks[page_start] = new_buffer; |
| 353 | ++page_start; | 378 | ++page_start; |
| @@ -355,18 +380,18 @@ private: | |||
| 355 | return new_buffer; | 380 | return new_buffer; |
| 356 | } | 381 | } |
| 357 | 382 | ||
| 358 | TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { | 383 | TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { |
| 359 | TBuffer found{}; | 384 | TBuffer found{}; |
| 360 | const CacheAddr cache_addr_end = cache_addr + size - 1; | 385 | const VAddr cpu_addr_end = cpu_addr + size - 1; |
| 361 | u64 page_start = cache_addr >> block_page_bits; | 386 | u64 page_start = cpu_addr >> block_page_bits; |
| 362 | const u64 page_end = cache_addr_end >> block_page_bits; | 387 | const u64 page_end = cpu_addr_end >> block_page_bits; |
| 363 | while (page_start <= page_end) { | 388 | while (page_start <= page_end) { |
| 364 | auto it = blocks.find(page_start); | 389 | auto it = blocks.find(page_start); |
| 365 | if (it == blocks.end()) { | 390 | if (it == blocks.end()) { |
| 366 | if (found) { | 391 | if (found) { |
| 367 | found = EnlargeBlock(found); | 392 | found = EnlargeBlock(found); |
| 368 | } else { | 393 | } else { |
| 369 | const CacheAddr start_addr = (page_start << block_page_bits); | 394 | const VAddr start_addr = (page_start << block_page_bits); |
| 370 | found = CreateBlock(start_addr, block_page_size); | 395 | found = CreateBlock(start_addr, block_page_size); |
| 371 | blocks[page_start] = found; | 396 | blocks[page_start] = found; |
| 372 | } | 397 | } |
| @@ -386,7 +411,7 @@ private: | |||
| 386 | return found; | 411 | return found; |
| 387 | } | 412 | } |
| 388 | 413 | ||
| 389 | void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | 414 | void MarkRegionAsWritten(const VAddr start, const VAddr end) { |
| 390 | u64 page_start = start >> write_page_bit; | 415 | u64 page_start = start >> write_page_bit; |
| 391 | const u64 page_end = end >> write_page_bit; | 416 | const u64 page_end = end >> write_page_bit; |
| 392 | while (page_start <= page_end) { | 417 | while (page_start <= page_end) { |
| @@ -400,7 +425,7 @@ private: | |||
| 400 | } | 425 | } |
| 401 | } | 426 | } |
| 402 | 427 | ||
| 403 | void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | 428 | void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { |
| 404 | u64 page_start = start >> write_page_bit; | 429 | u64 page_start = start >> write_page_bit; |
| 405 | const u64 page_end = end >> write_page_bit; | 430 | const u64 page_end = end >> write_page_bit; |
| 406 | while (page_start <= page_end) { | 431 | while (page_start <= page_end) { |
| @@ -416,7 +441,7 @@ private: | |||
| 416 | } | 441 | } |
| 417 | } | 442 | } |
| 418 | 443 | ||
| 419 | bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { | 444 | bool IsRegionWritten(const VAddr start, const VAddr end) const { |
| 420 | u64 page_start = start >> write_page_bit; | 445 | u64 page_start = start >> write_page_bit; |
| 421 | const u64 page_end = end >> write_page_bit; | 446 | const u64 page_end = end >> write_page_bit; |
| 422 | while (page_start <= page_end) { | 447 | while (page_start <= page_end) { |
| @@ -440,8 +465,8 @@ private: | |||
| 440 | u64 buffer_offset = 0; | 465 | u64 buffer_offset = 0; |
| 441 | u64 buffer_offset_base = 0; | 466 | u64 buffer_offset_base = 0; |
| 442 | 467 | ||
| 443 | using IntervalSet = boost::icl::interval_set<CacheAddr>; | 468 | using IntervalSet = boost::icl::interval_set<VAddr>; |
| 444 | using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; | 469 | using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; |
| 445 | using IntervalType = typename IntervalCache::interval_type; | 470 | using IntervalType = typename IntervalCache::interval_type; |
| 446 | IntervalCache mapped_addresses; | 471 | IntervalCache mapped_addresses; |
| 447 | 472 | ||
| @@ -456,6 +481,8 @@ private: | |||
| 456 | u64 epoch = 0; | 481 | u64 epoch = 0; |
| 457 | u64 modified_ticks = 0; | 482 | u64 modified_ticks = 0; |
| 458 | 483 | ||
| 484 | std::vector<u8> staging_buffer; | ||
| 485 | |||
| 459 | std::recursive_mutex mutex; | 486 | std::recursive_mutex mutex; |
| 460 | }; | 487 | }; |
| 461 | 488 | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 3a104d5cd..b0956029d 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -11,7 +11,7 @@ namespace VideoCommon { | |||
| 11 | 11 | ||
| 12 | class MapIntervalBase { | 12 | class MapIntervalBase { |
| 13 | public: | 13 | public: |
| 14 | MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) | 14 | MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) |
| 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} | 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} |
| 16 | 16 | ||
| 17 | void SetCpuAddress(VAddr new_cpu_addr) { | 17 | void SetCpuAddress(VAddr new_cpu_addr) { |
| @@ -26,7 +26,7 @@ public: | |||
| 26 | return gpu_addr; | 26 | return gpu_addr; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | 29 | bool IsInside(const VAddr other_start, const VAddr other_end) const { |
| 30 | return (start <= other_start && other_end <= end); | 30 | return (start <= other_start && other_end <= end); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| @@ -46,11 +46,11 @@ public: | |||
| 46 | return is_registered; | 46 | return is_registered; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | CacheAddr GetStart() const { | 49 | VAddr GetStart() const { |
| 50 | return start; | 50 | return start; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | CacheAddr GetEnd() const { | 53 | VAddr GetEnd() const { |
| 54 | return end; | 54 | return end; |
| 55 | } | 55 | } |
| 56 | 56 | ||
| @@ -76,8 +76,8 @@ public: | |||
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | private: | 78 | private: |
| 79 | CacheAddr start; | 79 | VAddr start; |
| 80 | CacheAddr end; | 80 | VAddr end; |
| 81 | GPUVAddr gpu_addr; | 81 | GPUVAddr gpu_addr; |
| 82 | VAddr cpu_addr{}; | 82 | VAddr cpu_addr{}; |
| 83 | bool is_written{}; | 83 | bool is_written{}; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d24c9f657..5cf6a4cc3 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -303,6 +303,10 @@ public: | |||
| 303 | return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); | 303 | return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); |
| 304 | } | 304 | } |
| 305 | 305 | ||
| 306 | bool IsConstant() const { | ||
| 307 | return constant; | ||
| 308 | } | ||
| 309 | |||
| 306 | bool IsValid() const { | 310 | bool IsValid() const { |
| 307 | return size != Size::Invalid; | 311 | return size != Size::Invalid; |
| 308 | } | 312 | } |
| @@ -312,6 +316,35 @@ public: | |||
| 312 | } | 316 | } |
| 313 | }; | 317 | }; |
| 314 | 318 | ||
| 319 | struct MsaaSampleLocation { | ||
| 320 | union { | ||
| 321 | BitField<0, 4, u32> x0; | ||
| 322 | BitField<4, 4, u32> y0; | ||
| 323 | BitField<8, 4, u32> x1; | ||
| 324 | BitField<12, 4, u32> y1; | ||
| 325 | BitField<16, 4, u32> x2; | ||
| 326 | BitField<20, 4, u32> y2; | ||
| 327 | BitField<24, 4, u32> x3; | ||
| 328 | BitField<28, 4, u32> y3; | ||
| 329 | }; | ||
| 330 | |||
| 331 | constexpr std::pair<u32, u32> Location(int index) const { | ||
| 332 | switch (index) { | ||
| 333 | case 0: | ||
| 334 | return {x0, y0}; | ||
| 335 | case 1: | ||
| 336 | return {x1, y1}; | ||
| 337 | case 2: | ||
| 338 | return {x2, y2}; | ||
| 339 | case 3: | ||
| 340 | return {x3, y3}; | ||
| 341 | default: | ||
| 342 | UNREACHABLE(); | ||
| 343 | return {0, 0}; | ||
| 344 | } | ||
| 345 | } | ||
| 346 | }; | ||
| 347 | |||
| 315 | enum class DepthMode : u32 { | 348 | enum class DepthMode : u32 { |
| 316 | MinusOneToOne = 0, | 349 | MinusOneToOne = 0, |
| 317 | ZeroToOne = 1, | 350 | ZeroToOne = 1, |
| @@ -793,7 +826,13 @@ public: | |||
| 793 | 826 | ||
| 794 | u32 rt_separate_frag_data; | 827 | u32 rt_separate_frag_data; |
| 795 | 828 | ||
| 796 | INSERT_UNION_PADDING_WORDS(0xC); | 829 | INSERT_UNION_PADDING_WORDS(0x1); |
| 830 | |||
| 831 | u32 multisample_raster_enable; | ||
| 832 | u32 multisample_raster_samples; | ||
| 833 | std::array<u32, 4> multisample_sample_mask; | ||
| 834 | |||
| 835 | INSERT_UNION_PADDING_WORDS(0x5); | ||
| 797 | 836 | ||
| 798 | struct { | 837 | struct { |
| 799 | u32 address_high; | 838 | u32 address_high; |
| @@ -830,7 +869,16 @@ public: | |||
| 830 | 869 | ||
| 831 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; | 870 | std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; |
| 832 | 871 | ||
| 833 | INSERT_UNION_PADDING_WORDS(0xF); | 872 | std::array<MsaaSampleLocation, 4> multisample_sample_locations; |
| 873 | |||
| 874 | INSERT_UNION_PADDING_WORDS(0x2); | ||
| 875 | |||
| 876 | union { | ||
| 877 | BitField<0, 1, u32> enable; | ||
| 878 | BitField<4, 3, u32> target; | ||
| 879 | } multisample_coverage_to_color; | ||
| 880 | |||
| 881 | INSERT_UNION_PADDING_WORDS(0x8); | ||
| 834 | 882 | ||
| 835 | struct { | 883 | struct { |
| 836 | union { | 884 | union { |
| @@ -922,7 +970,10 @@ public: | |||
| 922 | BitField<4, 1, u32> triangle_rast_flip; | 970 | BitField<4, 1, u32> triangle_rast_flip; |
| 923 | } screen_y_control; | 971 | } screen_y_control; |
| 924 | 972 | ||
| 925 | INSERT_UNION_PADDING_WORDS(0x21); | 973 | float line_width_smooth; |
| 974 | float line_width_aliased; | ||
| 975 | |||
| 976 | INSERT_UNION_PADDING_WORDS(0x1F); | ||
| 926 | 977 | ||
| 927 | u32 vb_element_base; | 978 | u32 vb_element_base; |
| 928 | u32 vb_base_instance; | 979 | u32 vb_base_instance; |
| @@ -943,7 +994,7 @@ public: | |||
| 943 | 994 | ||
| 944 | CounterReset counter_reset; | 995 | CounterReset counter_reset; |
| 945 | 996 | ||
| 946 | INSERT_UNION_PADDING_WORDS(0x1); | 997 | u32 multisample_enable; |
| 947 | 998 | ||
| 948 | u32 zeta_enable; | 999 | u32 zeta_enable; |
| 949 | 1000 | ||
| @@ -980,7 +1031,7 @@ public: | |||
| 980 | 1031 | ||
| 981 | float polygon_offset_factor; | 1032 | float polygon_offset_factor; |
| 982 | 1033 | ||
| 983 | INSERT_UNION_PADDING_WORDS(0x1); | 1034 | u32 line_smooth_enable; |
| 984 | 1035 | ||
| 985 | struct { | 1036 | struct { |
| 986 | u32 tic_address_high; | 1037 | u32 tic_address_high; |
| @@ -1007,7 +1058,11 @@ public: | |||
| 1007 | 1058 | ||
| 1008 | float polygon_offset_units; | 1059 | float polygon_offset_units; |
| 1009 | 1060 | ||
| 1010 | INSERT_UNION_PADDING_WORDS(0x11); | 1061 | INSERT_UNION_PADDING_WORDS(0x4); |
| 1062 | |||
| 1063 | Tegra::Texture::MsaaMode multisample_mode; | ||
| 1064 | |||
| 1065 | INSERT_UNION_PADDING_WORDS(0xC); | ||
| 1011 | 1066 | ||
| 1012 | union { | 1067 | union { |
| 1013 | BitField<2, 1, u32> coord_origin; | 1068 | BitField<2, 1, u32> coord_origin; |
| @@ -1507,12 +1562,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); | |||
| 1507 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); | 1562 | ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); |
| 1508 | ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); | 1563 | ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); |
| 1509 | ASSERT_REG_POSITION(color_mask_common, 0x3E4); | 1564 | ASSERT_REG_POSITION(color_mask_common, 0x3E4); |
| 1510 | ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); | ||
| 1511 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); | 1565 | ASSERT_REG_POSITION(depth_bounds, 0x3E7); |
| 1566 | ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); | ||
| 1567 | ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); | ||
| 1568 | ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); | ||
| 1569 | ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); | ||
| 1512 | ASSERT_REG_POSITION(zeta, 0x3F8); | 1570 | ASSERT_REG_POSITION(zeta, 0x3F8); |
| 1513 | ASSERT_REG_POSITION(clear_flags, 0x43E); | 1571 | ASSERT_REG_POSITION(clear_flags, 0x43E); |
| 1514 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); | 1572 | ASSERT_REG_POSITION(fill_rectangle, 0x44F); |
| 1515 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); | 1573 | ASSERT_REG_POSITION(vertex_attrib_format, 0x458); |
| 1574 | ASSERT_REG_POSITION(multisample_sample_locations, 0x478); | ||
| 1575 | ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); | ||
| 1516 | ASSERT_REG_POSITION(rt_control, 0x487); | 1576 | ASSERT_REG_POSITION(rt_control, 0x487); |
| 1517 | ASSERT_REG_POSITION(zeta_width, 0x48a); | 1577 | ASSERT_REG_POSITION(zeta_width, 0x48a); |
| 1518 | ASSERT_REG_POSITION(zeta_height, 0x48b); | 1578 | ASSERT_REG_POSITION(zeta_height, 0x48b); |
| @@ -1538,6 +1598,8 @@ ASSERT_REG_POSITION(stencil_front_func_mask, 0x4E6); | |||
| 1538 | ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); | 1598 | ASSERT_REG_POSITION(stencil_front_mask, 0x4E7); |
| 1539 | ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); | 1599 | ASSERT_REG_POSITION(frag_color_clamp, 0x4EA); |
| 1540 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); | 1600 | ASSERT_REG_POSITION(screen_y_control, 0x4EB); |
| 1601 | ASSERT_REG_POSITION(line_width_smooth, 0x4EC); | ||
| 1602 | ASSERT_REG_POSITION(line_width_aliased, 0x4ED); | ||
| 1541 | ASSERT_REG_POSITION(vb_element_base, 0x50D); | 1603 | ASSERT_REG_POSITION(vb_element_base, 0x50D); |
| 1542 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); | 1604 | ASSERT_REG_POSITION(vb_base_instance, 0x50E); |
| 1543 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); | 1605 | ASSERT_REG_POSITION(clip_distance_enabled, 0x544); |
| @@ -1545,11 +1607,13 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545); | |||
| 1545 | ASSERT_REG_POSITION(point_size, 0x546); | 1607 | ASSERT_REG_POSITION(point_size, 0x546); |
| 1546 | ASSERT_REG_POSITION(point_sprite_enable, 0x548); | 1608 | ASSERT_REG_POSITION(point_sprite_enable, 0x548); |
| 1547 | ASSERT_REG_POSITION(counter_reset, 0x54C); | 1609 | ASSERT_REG_POSITION(counter_reset, 0x54C); |
| 1610 | ASSERT_REG_POSITION(multisample_enable, 0x54D); | ||
| 1548 | ASSERT_REG_POSITION(zeta_enable, 0x54E); | 1611 | ASSERT_REG_POSITION(zeta_enable, 0x54E); |
| 1549 | ASSERT_REG_POSITION(multisample_control, 0x54F); | 1612 | ASSERT_REG_POSITION(multisample_control, 0x54F); |
| 1550 | ASSERT_REG_POSITION(condition, 0x554); | 1613 | ASSERT_REG_POSITION(condition, 0x554); |
| 1551 | ASSERT_REG_POSITION(tsc, 0x557); | 1614 | ASSERT_REG_POSITION(tsc, 0x557); |
| 1552 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); | 1615 | ASSERT_REG_POSITION(polygon_offset_factor, 0x55B); |
| 1616 | ASSERT_REG_POSITION(line_smooth_enable, 0x55C); | ||
| 1553 | ASSERT_REG_POSITION(tic, 0x55D); | 1617 | ASSERT_REG_POSITION(tic, 0x55D); |
| 1554 | ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); | 1618 | ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); |
| 1555 | ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); | 1619 | ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); |
| @@ -1558,6 +1622,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); | |||
| 1558 | ASSERT_REG_POSITION(stencil_back_func_func, 0x569); | 1622 | ASSERT_REG_POSITION(stencil_back_func_func, 0x569); |
| 1559 | ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); | 1623 | ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); |
| 1560 | ASSERT_REG_POSITION(polygon_offset_units, 0x56F); | 1624 | ASSERT_REG_POSITION(polygon_offset_units, 0x56F); |
| 1625 | ASSERT_REG_POSITION(multisample_mode, 0x574); | ||
| 1561 | ASSERT_REG_POSITION(point_coord_replace, 0x581); | 1626 | ASSERT_REG_POSITION(point_coord_replace, 0x581); |
| 1562 | ASSERT_REG_POSITION(code_address, 0x582); | 1627 | ASSERT_REG_POSITION(code_address, 0x582); |
| 1563 | ASSERT_REG_POSITION(draw, 0x585); | 1628 | ASSERT_REG_POSITION(draw, 0x585); |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index a31947ef3..5e9cfba22 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -290,6 +290,23 @@ enum class VmadShr : u64 { | |||
| 290 | Shr15 = 2, | 290 | Shr15 = 2, |
| 291 | }; | 291 | }; |
| 292 | 292 | ||
| 293 | enum class VmnmxType : u64 { | ||
| 294 | Bits8, | ||
| 295 | Bits16, | ||
| 296 | Bits32, | ||
| 297 | }; | ||
| 298 | |||
| 299 | enum class VmnmxOperation : u64 { | ||
| 300 | Mrg_16H = 0, | ||
| 301 | Mrg_16L = 1, | ||
| 302 | Mrg_8B0 = 2, | ||
| 303 | Mrg_8B2 = 3, | ||
| 304 | Acc = 4, | ||
| 305 | Min = 5, | ||
| 306 | Max = 6, | ||
| 307 | Nop = 7, | ||
| 308 | }; | ||
| 309 | |||
| 293 | enum class XmadMode : u64 { | 310 | enum class XmadMode : u64 { |
| 294 | None = 0, | 311 | None = 0, |
| 295 | CLo = 1, | 312 | CLo = 1, |
| @@ -1657,6 +1674,42 @@ union Instruction { | |||
| 1657 | } vmad; | 1674 | } vmad; |
| 1658 | 1675 | ||
| 1659 | union { | 1676 | union { |
| 1677 | BitField<54, 1, u64> is_dest_signed; | ||
| 1678 | BitField<48, 1, u64> is_src_a_signed; | ||
| 1679 | BitField<49, 1, u64> is_src_b_signed; | ||
| 1680 | BitField<37, 2, u64> src_format_a; | ||
| 1681 | BitField<29, 2, u64> src_format_b; | ||
| 1682 | BitField<56, 1, u64> mx; | ||
| 1683 | BitField<55, 1, u64> sat; | ||
| 1684 | BitField<36, 2, u64> selector_a; | ||
| 1685 | BitField<28, 2, u64> selector_b; | ||
| 1686 | BitField<50, 1, u64> is_op_b_register; | ||
| 1687 | BitField<51, 3, VmnmxOperation> operation; | ||
| 1688 | |||
| 1689 | VmnmxType SourceFormatA() const { | ||
| 1690 | switch (src_format_a) { | ||
| 1691 | case 0b11: | ||
| 1692 | return VmnmxType::Bits32; | ||
| 1693 | case 0b10: | ||
| 1694 | return VmnmxType::Bits16; | ||
| 1695 | default: | ||
| 1696 | return VmnmxType::Bits8; | ||
| 1697 | } | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | VmnmxType SourceFormatB() const { | ||
| 1701 | switch (src_format_b) { | ||
| 1702 | case 0b11: | ||
| 1703 | return VmnmxType::Bits32; | ||
| 1704 | case 0b10: | ||
| 1705 | return VmnmxType::Bits16; | ||
| 1706 | default: | ||
| 1707 | return VmnmxType::Bits8; | ||
| 1708 | } | ||
| 1709 | } | ||
| 1710 | } vmnmx; | ||
| 1711 | |||
| 1712 | union { | ||
| 1660 | BitField<20, 16, u64> imm20_16; | 1713 | BitField<20, 16, u64> imm20_16; |
| 1661 | BitField<35, 1, u64> high_b_rr; // used on RR | 1714 | BitField<35, 1, u64> high_b_rr; // used on RR |
| 1662 | BitField<36, 1, u64> product_shift_left; | 1715 | BitField<36, 1, u64> product_shift_left; |
| @@ -1718,6 +1771,7 @@ public: | |||
| 1718 | BRK, | 1771 | BRK, |
| 1719 | DEPBAR, | 1772 | DEPBAR, |
| 1720 | VOTE, | 1773 | VOTE, |
| 1774 | VOTE_VTG, | ||
| 1721 | SHFL, | 1775 | SHFL, |
| 1722 | FSWZADD, | 1776 | FSWZADD, |
| 1723 | BFE_C, | 1777 | BFE_C, |
| @@ -1765,9 +1819,11 @@ public: | |||
| 1765 | IPA, | 1819 | IPA, |
| 1766 | OUT_R, // Emit vertex/primitive | 1820 | OUT_R, // Emit vertex/primitive |
| 1767 | ISBERD, | 1821 | ISBERD, |
| 1822 | BAR, | ||
| 1768 | MEMBAR, | 1823 | MEMBAR, |
| 1769 | VMAD, | 1824 | VMAD, |
| 1770 | VSETP, | 1825 | VSETP, |
| 1826 | VMNMX, | ||
| 1771 | FFMA_IMM, // Fused Multiply and Add | 1827 | FFMA_IMM, // Fused Multiply and Add |
| 1772 | FFMA_CR, | 1828 | FFMA_CR, |
| 1773 | FFMA_RC, | 1829 | FFMA_RC, |
| @@ -1822,7 +1878,8 @@ public: | |||
| 1822 | ICMP_R, | 1878 | ICMP_R, |
| 1823 | ICMP_CR, | 1879 | ICMP_CR, |
| 1824 | ICMP_IMM, | 1880 | ICMP_IMM, |
| 1825 | FCMP_R, | 1881 | FCMP_RR, |
| 1882 | FCMP_RC, | ||
| 1826 | MUFU, // Multi-Function Operator | 1883 | MUFU, // Multi-Function Operator |
| 1827 | RRO_C, // Range Reduction Operator | 1884 | RRO_C, // Range Reduction Operator |
| 1828 | RRO_R, | 1885 | RRO_R, |
| @@ -1849,7 +1906,7 @@ public: | |||
| 1849 | MOV_C, | 1906 | MOV_C, |
| 1850 | MOV_R, | 1907 | MOV_R, |
| 1851 | MOV_IMM, | 1908 | MOV_IMM, |
| 1852 | MOV_SYS, | 1909 | S2R, |
| 1853 | MOV32_IMM, | 1910 | MOV32_IMM, |
| 1854 | SHL_C, | 1911 | SHL_C, |
| 1855 | SHL_R, | 1912 | SHL_R, |
| @@ -2033,6 +2090,7 @@ private: | |||
| 2033 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), | 2090 | INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), |
| 2034 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), | 2091 | INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), |
| 2035 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), | 2092 | INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), |
| 2093 | INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), | ||
| 2036 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), | 2094 | INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), |
| 2037 | INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), | 2095 | INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), |
| 2038 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), | 2096 | INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), |
| @@ -2071,9 +2129,11 @@ private: | |||
| 2071 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 2129 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 2072 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 2130 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
| 2073 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), | 2131 | INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), |
| 2132 | INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), | ||
| 2074 | INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), | 2133 | INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), |
| 2075 | INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), | 2134 | INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), |
| 2076 | INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), | 2135 | INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), |
| 2136 | INST("0011101---------", Id::VMNMX, Type::Video, "VMNMX"), | ||
| 2077 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), | 2137 | INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), |
| 2078 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), | 2138 | INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), |
| 2079 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), | 2139 | INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), |
| @@ -2128,7 +2188,8 @@ private: | |||
| 2128 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), | 2188 | INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), |
| 2129 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), | 2189 | INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), |
| 2130 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), | 2190 | INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), |
| 2131 | INST("010110111010----", Id::FCMP_R, Type::Arithmetic, "FCMP_R"), | 2191 | INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), |
| 2192 | INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), | ||
| 2132 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), | 2193 | INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), |
| 2133 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), | 2194 | INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), |
| 2134 | INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), | 2195 | INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), |
| @@ -2142,7 +2203,7 @@ private: | |||
| 2142 | INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), | 2203 | INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), |
| 2143 | INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), | 2204 | INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), |
| 2144 | INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), | 2205 | INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), |
| 2145 | INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"), | 2206 | INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), |
| 2146 | INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), | 2207 | INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), |
| 2147 | INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), | 2208 | INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), |
| 2148 | INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), | 2209 | INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), |
| @@ -2174,7 +2235,7 @@ private: | |||
| 2174 | INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), | 2235 | INST("0011011-11111---", Id::SHF_LEFT_IMM, Type::Shift, "SHF_LEFT_IMM"), |
| 2175 | INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), | 2236 | INST("0100110011100---", Id::I2I_C, Type::Conversion, "I2I_C"), |
| 2176 | INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), | 2237 | INST("0101110011100---", Id::I2I_R, Type::Conversion, "I2I_R"), |
| 2177 | INST("0011101-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), | 2238 | INST("0011100-11100---", Id::I2I_IMM, Type::Conversion, "I2I_IMM"), |
| 2178 | INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), | 2239 | INST("0100110010111---", Id::I2F_C, Type::Conversion, "I2F_C"), |
| 2179 | INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), | 2240 | INST("0101110010111---", Id::I2F_R, Type::Conversion, "I2F_R"), |
| 2180 | INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), | 2241 | INST("0011100-10111---", Id::I2F_IMM, Type::Conversion, "I2F_IMM"), |
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index bc80661d8..72e2a33d5 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h | |||
| @@ -4,6 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | |||
| 7 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 8 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 9 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -16,7 +19,7 @@ enum class OutputTopology : u32 { | |||
| 16 | TriangleStrip = 7, | 19 | TriangleStrip = 7, |
| 17 | }; | 20 | }; |
| 18 | 21 | ||
| 19 | enum class AttributeUse : u8 { | 22 | enum class PixelImap : u8 { |
| 20 | Unused = 0, | 23 | Unused = 0, |
| 21 | Constant = 1, | 24 | Constant = 1, |
| 22 | Perspective = 2, | 25 | Perspective = 2, |
| @@ -24,7 +27,7 @@ enum class AttributeUse : u8 { | |||
| 24 | }; | 27 | }; |
| 25 | 28 | ||
| 26 | // Documentation in: | 29 | // Documentation in: |
| 27 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture | 30 | // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html |
| 28 | struct Header { | 31 | struct Header { |
| 29 | union { | 32 | union { |
| 30 | BitField<0, 5, u32> sph_type; | 33 | BitField<0, 5, u32> sph_type; |
| @@ -59,8 +62,8 @@ struct Header { | |||
| 59 | union { | 62 | union { |
| 60 | BitField<0, 12, u32> max_output_vertices; | 63 | BitField<0, 12, u32> max_output_vertices; |
| 61 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. | 64 | BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. |
| 62 | BitField<24, 4, u32> reserved; | 65 | BitField<20, 4, u32> reserved; |
| 63 | BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. | 66 | BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. |
| 64 | } common4{}; | 67 | } common4{}; |
| 65 | 68 | ||
| 66 | union { | 69 | union { |
| @@ -93,17 +96,20 @@ struct Header { | |||
| 93 | struct { | 96 | struct { |
| 94 | INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA | 97 | INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA |
| 95 | INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB | 98 | INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB |
| 99 | |||
| 96 | union { | 100 | union { |
| 97 | BitField<0, 2, AttributeUse> x; | 101 | BitField<0, 2, PixelImap> x; |
| 98 | BitField<2, 2, AttributeUse> y; | 102 | BitField<2, 2, PixelImap> y; |
| 99 | BitField<4, 2, AttributeUse> w; | 103 | BitField<4, 2, PixelImap> z; |
| 100 | BitField<6, 2, AttributeUse> z; | 104 | BitField<6, 2, PixelImap> w; |
| 101 | u8 raw; | 105 | u8 raw; |
| 102 | } imap_generic_vector[32]; | 106 | } imap_generic_vector[32]; |
| 107 | |||
| 103 | INSERT_UNION_PADDING_BYTES(2); // ImapColor | 108 | INSERT_UNION_PADDING_BYTES(2); // ImapColor |
| 104 | INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC | 109 | INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC |
| 105 | INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] | 110 | INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] |
| 106 | INSERT_UNION_PADDING_BYTES(2); // ImapReserved | 111 | INSERT_UNION_PADDING_BYTES(2); // ImapReserved |
| 112 | |||
| 107 | struct { | 113 | struct { |
| 108 | u32 target; | 114 | u32 target; |
| 109 | union { | 115 | union { |
| @@ -112,31 +118,30 @@ struct Header { | |||
| 112 | BitField<2, 30, u32> reserved; | 118 | BitField<2, 30, u32> reserved; |
| 113 | }; | 119 | }; |
| 114 | } omap; | 120 | } omap; |
| 121 | |||
| 115 | bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { | 122 | bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { |
| 116 | const u32 bit = render_target * 4 + component; | 123 | const u32 bit = render_target * 4 + component; |
| 117 | return omap.target & (1 << bit); | 124 | return omap.target & (1 << bit); |
| 118 | } | 125 | } |
| 119 | AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { | 126 | |
| 120 | return static_cast<AttributeUse>( | 127 | PixelImap GetPixelImap(u32 attribute) const { |
| 121 | (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); | 128 | const auto get_index = [this, attribute](u32 index) { |
| 122 | } | 129 | return static_cast<PixelImap>( |
| 123 | AttributeUse GetAttributeUse(u32 attribute) const { | 130 | (imap_generic_vector[attribute].raw >> (index * 2)) & 3); |
| 124 | AttributeUse result = AttributeUse::Unused; | 131 | }; |
| 125 | for (u32 i = 0; i < 4; i++) { | 132 | |
| 126 | const auto index = GetAttributeIndexUse(attribute, i); | 133 | std::optional<PixelImap> result; |
| 127 | if (index == AttributeUse::Unused) { | 134 | for (u32 component = 0; component < 4; ++component) { |
| 128 | continue; | 135 | const PixelImap index = get_index(component); |
| 129 | } | 136 | if (index == PixelImap::Unused) { |
| 130 | if (result == AttributeUse::Unused || result == index) { | ||
| 131 | result = index; | ||
| 132 | continue; | 137 | continue; |
| 133 | } | 138 | } |
| 134 | LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); | 139 | if (result && result != index) { |
| 135 | if (index == AttributeUse::Perspective) { | 140 | LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); |
| 136 | result = index; | ||
| 137 | } | 141 | } |
| 142 | result = index; | ||
| 138 | } | 143 | } |
| 139 | return result; | 144 | return result.value_or(PixelImap::Unused); |
| 140 | } | 145 | } |
| 141 | } ps; | 146 | } ps; |
| 142 | 147 | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ced9d7e28..1a2d747be 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -270,13 +270,13 @@ public: | |||
| 270 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | 270 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; |
| 271 | 271 | ||
| 272 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 272 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 273 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | 273 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 274 | 274 | ||
| 275 | /// Notify rasterizer that any caches of the specified region should be invalidated | 275 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 276 | virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | 276 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 277 | 277 | ||
| 278 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 278 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 279 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 279 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| 280 | 280 | ||
| 281 | protected: | 281 | protected: |
| 282 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | 282 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 925be8d7b..20e73a37e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -12,8 +12,9 @@ namespace VideoCommon { | |||
| 12 | 12 | ||
| 13 | GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, | 13 | GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, |
| 14 | std::unique_ptr<Core::Frontend::GraphicsContext>&& context) | 14 | std::unique_ptr<Core::Frontend::GraphicsContext>&& context) |
| 15 | : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), | 15 | : GPU(system, std::move(renderer_), true), gpu_thread{system}, |
| 16 | cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} | 16 | cpu_context(renderer->GetRenderWindow().CreateSharedContext()), |
| 17 | gpu_context(std::move(context)) {} | ||
| 17 | 18 | ||
| 18 | GPUAsynch::~GPUAsynch() = default; | 19 | GPUAsynch::~GPUAsynch() = default; |
| 19 | 20 | ||
| @@ -30,15 +31,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 30 | gpu_thread.SwapBuffers(framebuffer); | 31 | gpu_thread.SwapBuffers(framebuffer); |
| 31 | } | 32 | } |
| 32 | 33 | ||
| 33 | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { | 34 | void GPUAsynch::FlushRegion(VAddr addr, u64 size) { |
| 34 | gpu_thread.FlushRegion(addr, size); | 35 | gpu_thread.FlushRegion(addr, size); |
| 35 | } | 36 | } |
| 36 | 37 | ||
| 37 | void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { | 38 | void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { |
| 38 | gpu_thread.InvalidateRegion(addr, size); | 39 | gpu_thread.InvalidateRegion(addr, size); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 41 | void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 42 | void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 42 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 43 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 43 | } | 44 | } |
| 44 | 45 | ||
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 265c62758..03fd0eef0 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -27,9 +27,9 @@ public: | |||
| 27 | void Start() override; | 27 | void Start() override; |
| 28 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 28 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 29 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 29 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 30 | void FlushRegion(CacheAddr addr, u64 size) override; | 30 | void FlushRegion(VAddr addr, u64 size) override; |
| 31 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 31 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 32 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 33 | void WaitIdle() const override; | 33 | void WaitIdle() const override; |
| 34 | 34 | ||
| 35 | protected: | 35 | protected: |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index bd5278a5c..6f38a672a 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 26 | renderer->SwapBuffers(framebuffer); | 26 | renderer->SwapBuffers(framebuffer); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { | 29 | void GPUSynch::FlushRegion(VAddr addr, u64 size) { |
| 30 | renderer->Rasterizer().FlushRegion(addr, size); | 30 | renderer->Rasterizer().FlushRegion(addr, size); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { | 33 | void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { |
| 34 | renderer->Rasterizer().InvalidateRegion(addr, size); | 34 | renderer->Rasterizer().InvalidateRegion(addr, size); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 37 | void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 38 | renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); | 38 | renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); |
| 39 | } | 39 | } |
| 40 | 40 | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 866a94c8c..4a6e9a01d 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -26,9 +26,9 @@ public: | |||
| 26 | void Start() override; | 26 | void Start() override; |
| 27 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 27 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 28 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 28 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 29 | void FlushRegion(CacheAddr addr, u64 size) override; | 29 | void FlushRegion(VAddr addr, u64 size) override; |
| 30 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 30 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 31 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 31 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 32 | void WaitIdle() const override {} | 32 | void WaitIdle() const override {} |
| 33 | 33 | ||
| 34 | protected: | 34 | protected: |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 270c7ae0d..10cda686b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 77 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | 77 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | 80 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
| 81 | PushCommand(FlushRegionCommand(addr, size)); | 81 | PushCommand(FlushRegionCommand(addr, size)); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { | 84 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
| 85 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); | 85 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 88 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 89 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 89 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 90 | InvalidateRegion(addr, size); | 90 | InvalidateRegion(addr, size); |
| 91 | } | 91 | } |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index be36c580e..cd74ad330 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -47,26 +47,26 @@ struct SwapBuffersCommand final { | |||
| 47 | 47 | ||
| 48 | /// Command to signal to the GPU thread to flush a region | 48 | /// Command to signal to the GPU thread to flush a region |
| 49 | struct FlushRegionCommand final { | 49 | struct FlushRegionCommand final { |
| 50 | explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | 50 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
| 51 | 51 | ||
| 52 | CacheAddr addr; | 52 | VAddr addr; |
| 53 | u64 size; | 53 | u64 size; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | /// Command to signal to the GPU thread to invalidate a region | 56 | /// Command to signal to the GPU thread to invalidate a region |
| 57 | struct InvalidateRegionCommand final { | 57 | struct InvalidateRegionCommand final { |
| 58 | explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | 58 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
| 59 | 59 | ||
| 60 | CacheAddr addr; | 60 | VAddr addr; |
| 61 | u64 size; | 61 | u64 size; |
| 62 | }; | 62 | }; |
| 63 | 63 | ||
| 64 | /// Command to signal to the GPU thread to flush and invalidate a region | 64 | /// Command to signal to the GPU thread to flush and invalidate a region |
| 65 | struct FlushAndInvalidateRegionCommand final { | 65 | struct FlushAndInvalidateRegionCommand final { |
| 66 | explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) | 66 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) |
| 67 | : addr{addr}, size{size} {} | 67 | : addr{addr}, size{size} {} |
| 68 | 68 | ||
| 69 | CacheAddr addr; | 69 | VAddr addr; |
| 70 | u64 size; | 70 | u64 size; |
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| @@ -111,13 +111,13 @@ public: | |||
| 111 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 111 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 112 | 112 | ||
| 113 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 113 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 114 | void FlushRegion(CacheAddr addr, u64 size); | 114 | void FlushRegion(VAddr addr, u64 size); |
| 115 | 115 | ||
| 116 | /// Notify rasterizer that any caches of the specified region should be invalidated | 116 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 117 | void InvalidateRegion(CacheAddr addr, u64 size); | 117 | void InvalidateRegion(VAddr addr, u64 size); |
| 118 | 118 | ||
| 119 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 119 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 120 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | 120 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 121 | 121 | ||
| 122 | // Wait until the gpu thread is idle. | 122 | // Wait until the gpu thread is idle. |
| 123 | void WaitIdle() const; | 123 | void WaitIdle() const; |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f5d33f27a..a3389d0d2 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 81 | ASSERT((gpu_addr & page_mask) == 0); | 81 | ASSERT((gpu_addr & page_mask) == 0); |
| 82 | 82 | ||
| 83 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 83 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 84 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | ||
| 85 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | 84 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); |
| 86 | ASSERT(cpu_addr); | 85 | ASSERT(cpu_addr); |
| 87 | 86 | ||
| 88 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | 87 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. |
| 89 | system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); | 88 | system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); |
| 90 | 89 | ||
| 91 | UnmapRange(gpu_addr, aligned_size); | 90 | UnmapRange(gpu_addr, aligned_size); |
| 92 | ASSERT(system.CurrentProcess() | 91 | ASSERT(system.CurrentProcess() |
| @@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const { | |||
| 140 | return {}; | 139 | return {}; |
| 141 | } | 140 | } |
| 142 | 141 | ||
| 143 | const u8* page_pointer{page_table.pointers[addr >> page_bits]}; | 142 | const u8* page_pointer{GetPointer(addr)}; |
| 144 | if (page_pointer) { | 143 | if (page_pointer) { |
| 145 | // NOTE: Avoid adding any extra logic to this fast-path block | 144 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 146 | T value; | 145 | T value; |
| 147 | std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); | 146 | std::memcpy(&value, page_pointer, sizeof(T)); |
| 148 | return value; | 147 | return value; |
| 149 | } | 148 | } |
| 150 | 149 | ||
| @@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) { | |||
| 167 | return; | 166 | return; |
| 168 | } | 167 | } |
| 169 | 168 | ||
| 170 | u8* page_pointer{page_table.pointers[addr >> page_bits]}; | 169 | u8* page_pointer{GetPointer(addr)}; |
| 171 | if (page_pointer) { | 170 | if (page_pointer) { |
| 172 | // NOTE: Avoid adding any extra logic to this fast-path block | 171 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 173 | std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); | 172 | std::memcpy(page_pointer, &data, sizeof(T)); |
| 174 | return; | 173 | return; |
| 175 | } | 174 | } |
| 176 | 175 | ||
| @@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { | |||
| 201 | return {}; | 200 | return {}; |
| 202 | } | 201 | } |
| 203 | 202 | ||
| 204 | u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | 203 | auto& memory = system.Memory(); |
| 205 | if (page_pointer != nullptr) { | 204 | |
| 206 | return page_pointer + (addr & page_mask); | 205 | const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; |
| 206 | |||
| 207 | if (page_addr != 0) { | ||
| 208 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 207 | } | 209 | } |
| 208 | 210 | ||
| 209 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 211 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); |
| @@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
| 215 | return {}; | 217 | return {}; |
| 216 | } | 218 | } |
| 217 | 219 | ||
| 218 | const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | 220 | const auto& memory = system.Memory(); |
| 219 | if (page_pointer != nullptr) { | 221 | |
| 220 | return page_pointer + (addr & page_mask); | 222 | const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; |
| 223 | |||
| 224 | if (page_addr != 0) { | ||
| 225 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 221 | } | 226 | } |
| 222 | 227 | ||
| 223 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 228 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); |
| @@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s | |||
| 238 | std::size_t page_index{src_addr >> page_bits}; | 243 | std::size_t page_index{src_addr >> page_bits}; |
| 239 | std::size_t page_offset{src_addr & page_mask}; | 244 | std::size_t page_offset{src_addr & page_mask}; |
| 240 | 245 | ||
| 246 | auto& memory = system.Memory(); | ||
| 247 | |||
| 241 | while (remaining_size > 0) { | 248 | while (remaining_size > 0) { |
| 242 | const std::size_t copy_amount{ | 249 | const std::size_t copy_amount{ |
| 243 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 250 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 244 | 251 | ||
| 245 | switch (page_table.attributes[page_index]) { | 252 | switch (page_table.attributes[page_index]) { |
| 246 | case Common::PageType::Memory: { | 253 | case Common::PageType::Memory: { |
| 247 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | 254 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; |
| 248 | // Flush must happen on the rasterizer interface, such that memory is always synchronous | 255 | // Flush must happen on the rasterizer interface, such that memory is always synchronous |
| 249 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. | 256 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. |
| 250 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | 257 | rasterizer.FlushRegion(src_addr, copy_amount); |
| 251 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 258 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |
| 252 | break; | 259 | break; |
| 253 | } | 260 | } |
| 254 | default: | 261 | default: |
| @@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, | |||
| 268 | std::size_t page_index{src_addr >> page_bits}; | 275 | std::size_t page_index{src_addr >> page_bits}; |
| 269 | std::size_t page_offset{src_addr & page_mask}; | 276 | std::size_t page_offset{src_addr & page_mask}; |
| 270 | 277 | ||
| 278 | auto& memory = system.Memory(); | ||
| 279 | |||
| 271 | while (remaining_size > 0) { | 280 | while (remaining_size > 0) { |
| 272 | const std::size_t copy_amount{ | 281 | const std::size_t copy_amount{ |
| 273 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 282 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 274 | const u8* page_pointer = page_table.pointers[page_index]; | 283 | const u8* page_pointer = page_table.pointers[page_index]; |
| 275 | if (page_pointer) { | 284 | if (page_pointer) { |
| 276 | const u8* src_ptr{page_pointer + page_offset}; | 285 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; |
| 277 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 286 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |
| 278 | } else { | 287 | } else { |
| 279 | std::memset(dest_buffer, 0, copy_amount); | 288 | std::memset(dest_buffer, 0, copy_amount); |
| 280 | } | 289 | } |
| @@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const | |||
| 290 | std::size_t page_index{dest_addr >> page_bits}; | 299 | std::size_t page_index{dest_addr >> page_bits}; |
| 291 | std::size_t page_offset{dest_addr & page_mask}; | 300 | std::size_t page_offset{dest_addr & page_mask}; |
| 292 | 301 | ||
| 302 | auto& memory = system.Memory(); | ||
| 303 | |||
| 293 | while (remaining_size > 0) { | 304 | while (remaining_size > 0) { |
| 294 | const std::size_t copy_amount{ | 305 | const std::size_t copy_amount{ |
| 295 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 306 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 296 | 307 | ||
| 297 | switch (page_table.attributes[page_index]) { | 308 | switch (page_table.attributes[page_index]) { |
| 298 | case Common::PageType::Memory: { | 309 | case Common::PageType::Memory: { |
| 299 | u8* dest_ptr{page_table.pointers[page_index] + page_offset}; | 310 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; |
| 300 | // Invalidate must happen on the rasterizer interface, such that memory is always | 311 | // Invalidate must happen on the rasterizer interface, such that memory is always |
| 301 | // synchronous when it is written (even when in asynchronous GPU mode). | 312 | // synchronous when it is written (even when in asynchronous GPU mode). |
| 302 | rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); | 313 | rasterizer.InvalidateRegion(dest_addr, copy_amount); |
| 303 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 314 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |
| 304 | break; | 315 | break; |
| 305 | } | 316 | } |
| 306 | default: | 317 | default: |
| @@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| 320 | std::size_t page_index{dest_addr >> page_bits}; | 331 | std::size_t page_index{dest_addr >> page_bits}; |
| 321 | std::size_t page_offset{dest_addr & page_mask}; | 332 | std::size_t page_offset{dest_addr & page_mask}; |
| 322 | 333 | ||
| 334 | auto& memory = system.Memory(); | ||
| 335 | |||
| 323 | while (remaining_size > 0) { | 336 | while (remaining_size > 0) { |
| 324 | const std::size_t copy_amount{ | 337 | const std::size_t copy_amount{ |
| 325 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 338 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 326 | u8* page_pointer = page_table.pointers[page_index]; | 339 | u8* page_pointer = page_table.pointers[page_index]; |
| 327 | if (page_pointer) { | 340 | if (page_pointer) { |
| 328 | u8* dest_ptr{page_pointer + page_offset}; | 341 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; |
| 329 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 342 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |
| 330 | } | 343 | } |
| 331 | page_index++; | 344 | page_index++; |
| 332 | page_offset = 0; | 345 | page_offset = 0; |
| @@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| 336 | } | 349 | } |
| 337 | 350 | ||
| 338 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 351 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { |
| 339 | std::size_t remaining_size{size}; | 352 | std::vector<u8> tmp_buffer(size); |
| 340 | std::size_t page_index{src_addr >> page_bits}; | 353 | ReadBlock(src_addr, tmp_buffer.data(), size); |
| 341 | std::size_t page_offset{src_addr & page_mask}; | 354 | WriteBlock(dest_addr, tmp_buffer.data(), size); |
| 342 | |||
| 343 | while (remaining_size > 0) { | ||
| 344 | const std::size_t copy_amount{ | ||
| 345 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 346 | |||
| 347 | switch (page_table.attributes[page_index]) { | ||
| 348 | case Common::PageType::Memory: { | ||
| 349 | // Flush must happen on the rasterizer interface, such that memory is always synchronous | ||
| 350 | // when it is copied (even when in asynchronous GPU mode). | ||
| 351 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | ||
| 352 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | ||
| 353 | WriteBlock(dest_addr, src_ptr, copy_amount); | ||
| 354 | break; | ||
| 355 | } | ||
| 356 | default: | ||
| 357 | UNREACHABLE(); | ||
| 358 | } | ||
| 359 | |||
| 360 | page_index++; | ||
| 361 | page_offset = 0; | ||
| 362 | dest_addr += static_cast<VAddr>(copy_amount); | ||
| 363 | src_addr += static_cast<VAddr>(copy_amount); | ||
| 364 | remaining_size -= copy_amount; | ||
| 365 | } | ||
| 366 | } | 355 | } |
| 367 | 356 | ||
| 368 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 357 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { |
| @@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const | |||
| 371 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); | 360 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); |
| 372 | } | 361 | } |
| 373 | 362 | ||
| 363 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { | ||
| 364 | const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; | ||
| 365 | const std::size_t page = (addr & Memory::PAGE_MASK) + size; | ||
| 366 | return page <= Memory::PAGE_SIZE; | ||
| 367 | } | ||
| 368 | |||
| 374 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | 369 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, |
| 375 | VAddr backing_addr) { | 370 | VAddr backing_addr) { |
| 376 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, | 371 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 073bdb491..0d9468535 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -97,6 +97,11 @@ public: | |||
| 97 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | 97 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); |
| 98 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); | 98 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); |
| 99 | 99 | ||
| 100 | /** | ||
| 101 | * IsGranularRange checks if a gpu region can be simply read with a pointer | ||
| 102 | */ | ||
| 103 | bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); | ||
| 104 | |||
| 100 | private: | 105 | private: |
| 101 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; | 106 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; |
| 102 | using VMAHandle = VMAMap::const_iterator; | 107 | using VMAHandle = VMAMap::const_iterator; |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index e66054ed0..5ea2b01f2 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -98,12 +98,12 @@ public: | |||
| 98 | static_cast<QueryCache&>(*this), | 98 | static_cast<QueryCache&>(*this), |
| 99 | VideoCore::QueryType::SamplesPassed}}} {} | 99 | VideoCore::QueryType::SamplesPassed}}} {} |
| 100 | 100 | ||
| 101 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | 101 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 102 | std::unique_lock lock{mutex}; | 102 | std::unique_lock lock{mutex}; |
| 103 | FlushAndRemoveRegion(addr, size); | 103 | FlushAndRemoveRegion(addr, size); |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | void FlushRegion(CacheAddr addr, std::size_t size) { | 106 | void FlushRegion(VAddr addr, std::size_t size) { |
| 107 | std::unique_lock lock{mutex}; | 107 | std::unique_lock lock{mutex}; |
| 108 | FlushAndRemoveRegion(addr, size); | 108 | FlushAndRemoveRegion(addr, size); |
| 109 | } | 109 | } |
| @@ -117,14 +117,16 @@ public: | |||
| 117 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | 117 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { |
| 118 | std::unique_lock lock{mutex}; | 118 | std::unique_lock lock{mutex}; |
| 119 | auto& memory_manager = system.GPU().MemoryManager(); | 119 | auto& memory_manager = system.GPU().MemoryManager(); |
| 120 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 120 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); |
| 121 | ASSERT(cpu_addr_opt); | ||
| 122 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 121 | 123 | ||
| 122 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | 124 | CachedQuery* query = TryGet(cpu_addr); |
| 123 | if (!query) { | 125 | if (!query) { |
| 124 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | 126 | ASSERT_OR_EXECUTE(cpu_addr_opt, return;); |
| 125 | ASSERT_OR_EXECUTE(cpu_addr, return;); | 127 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
| 126 | 128 | ||
| 127 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | 129 | query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); |
| 128 | } | 130 | } |
| 129 | 131 | ||
| 130 | query->BindCounter(Stream(type).Current(), timestamp); | 132 | query->BindCounter(Stream(type).Current(), timestamp); |
| @@ -173,11 +175,11 @@ protected: | |||
| 173 | 175 | ||
| 174 | private: | 176 | private: |
| 175 | /// Flushes a memory range to guest memory and removes it from the cache. | 177 | /// Flushes a memory range to guest memory and removes it from the cache. |
| 176 | void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | 178 | void FlushAndRemoveRegion(VAddr addr, std::size_t size) { |
| 177 | const u64 addr_begin = static_cast<u64>(addr); | 179 | const u64 addr_begin = static_cast<u64>(addr); |
| 178 | const u64 addr_end = addr_begin + static_cast<u64>(size); | 180 | const u64 addr_end = addr_begin + static_cast<u64>(size); |
| 179 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | 181 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { |
| 180 | const u64 cache_begin = query.GetCacheAddr(); | 182 | const u64 cache_begin = query.GetCpuAddr(); |
| 181 | const u64 cache_end = cache_begin + query.SizeInBytes(); | 183 | const u64 cache_end = cache_begin + query.SizeInBytes(); |
| 182 | return cache_begin < addr_end && addr_begin < cache_end; | 184 | return cache_begin < addr_end && addr_begin < cache_end; |
| 183 | }; | 185 | }; |
| @@ -193,7 +195,7 @@ private: | |||
| 193 | if (!in_range(query)) { | 195 | if (!in_range(query)) { |
| 194 | continue; | 196 | continue; |
| 195 | } | 197 | } |
| 196 | rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); | 198 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); |
| 197 | query.Flush(); | 199 | query.Flush(); |
| 198 | } | 200 | } |
| 199 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | 201 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), |
| @@ -204,22 +206,21 @@ private: | |||
| 204 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. | 206 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. |
| 205 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { | 207 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { |
| 206 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); | 208 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); |
| 207 | const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; | 209 | const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; |
| 208 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, | 210 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, |
| 209 | host_ptr); | 211 | host_ptr); |
| 210 | } | 212 | } |
| 211 | 213 | ||
| 212 | /// Tries to a get a cached query. Returns nullptr on failure. | 214 | /// Tries to a get a cached query. Returns nullptr on failure. |
| 213 | CachedQuery* TryGet(CacheAddr addr) { | 215 | CachedQuery* TryGet(VAddr addr) { |
| 214 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | 216 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; |
| 215 | const auto it = cached_queries.find(page); | 217 | const auto it = cached_queries.find(page); |
| 216 | if (it == std::end(cached_queries)) { | 218 | if (it == std::end(cached_queries)) { |
| 217 | return nullptr; | 219 | return nullptr; |
| 218 | } | 220 | } |
| 219 | auto& contents = it->second; | 221 | auto& contents = it->second; |
| 220 | const auto found = | 222 | const auto found = std::find_if(std::begin(contents), std::end(contents), |
| 221 | std::find_if(std::begin(contents), std::end(contents), | 223 | [addr](auto& query) { return query.GetCpuAddr() == addr; }); |
| 222 | [addr](auto& query) { return query.GetCacheAddr() == addr; }); | ||
| 223 | return found != std::end(contents) ? &*found : nullptr; | 224 | return found != std::end(contents) ? &*found : nullptr; |
| 224 | } | 225 | } |
| 225 | 226 | ||
| @@ -323,14 +324,10 @@ public: | |||
| 323 | timestamp = timestamp_; | 324 | timestamp = timestamp_; |
| 324 | } | 325 | } |
| 325 | 326 | ||
| 326 | VAddr CpuAddr() const noexcept { | 327 | VAddr GetCpuAddr() const noexcept { |
| 327 | return cpu_addr; | 328 | return cpu_addr; |
| 328 | } | 329 | } |
| 329 | 330 | ||
| 330 | CacheAddr GetCacheAddr() const noexcept { | ||
| 331 | return ToCacheAddr(host_ptr); | ||
| 332 | } | ||
| 333 | |||
| 334 | u64 SizeInBytes() const noexcept { | 331 | u64 SizeInBytes() const noexcept { |
| 335 | return SizeInBytes(timestamp.has_value()); | 332 | return SizeInBytes(timestamp.has_value()); |
| 336 | } | 333 | } |
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 6de1597a2..22987751e 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -18,22 +18,14 @@ | |||
| 18 | 18 | ||
| 19 | class RasterizerCacheObject { | 19 | class RasterizerCacheObject { |
| 20 | public: | 20 | public: |
| 21 | explicit RasterizerCacheObject(const u8* host_ptr) | 21 | explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} |
| 22 | : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} | ||
| 23 | 22 | ||
| 24 | virtual ~RasterizerCacheObject(); | 23 | virtual ~RasterizerCacheObject(); |
| 25 | 24 | ||
| 26 | CacheAddr GetCacheAddr() const { | 25 | VAddr GetCpuAddr() const { |
| 27 | return cache_addr; | 26 | return cpu_addr; |
| 28 | } | 27 | } |
| 29 | 28 | ||
| 30 | const u8* GetHostPtr() const { | ||
| 31 | return host_ptr; | ||
| 32 | } | ||
| 33 | |||
| 34 | /// Gets the address of the shader in guest memory, required for cache management | ||
| 35 | virtual VAddr GetCpuAddr() const = 0; | ||
| 36 | |||
| 37 | /// Gets the size of the shader in guest memory, required for cache management | 29 | /// Gets the size of the shader in guest memory, required for cache management |
| 38 | virtual std::size_t GetSizeInBytes() const = 0; | 30 | virtual std::size_t GetSizeInBytes() const = 0; |
| 39 | 31 | ||
| @@ -68,8 +60,7 @@ private: | |||
| 68 | bool is_registered{}; ///< Whether the object is currently registered with the cache | 60 | bool is_registered{}; ///< Whether the object is currently registered with the cache |
| 69 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) | 61 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) |
| 70 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing | 62 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing |
| 71 | const u8* host_ptr{}; ///< Pointer to the memory backing this cached region | 63 | VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space |
| 72 | CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space | ||
| 73 | }; | 64 | }; |
| 74 | 65 | ||
| 75 | template <class T> | 66 | template <class T> |
| @@ -80,7 +71,7 @@ public: | |||
| 80 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | 71 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
| 81 | 72 | ||
| 82 | /// Write any cached resources overlapping the specified region back to memory | 73 | /// Write any cached resources overlapping the specified region back to memory |
| 83 | void FlushRegion(CacheAddr addr, std::size_t size) { | 74 | void FlushRegion(VAddr addr, std::size_t size) { |
| 84 | std::lock_guard lock{mutex}; | 75 | std::lock_guard lock{mutex}; |
| 85 | 76 | ||
| 86 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | 77 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
| @@ -90,7 +81,7 @@ public: | |||
| 90 | } | 81 | } |
| 91 | 82 | ||
| 92 | /// Mark the specified region as being invalidated | 83 | /// Mark the specified region as being invalidated |
| 93 | void InvalidateRegion(CacheAddr addr, u64 size) { | 84 | void InvalidateRegion(VAddr addr, u64 size) { |
| 94 | std::lock_guard lock{mutex}; | 85 | std::lock_guard lock{mutex}; |
| 95 | 86 | ||
| 96 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | 87 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
| @@ -114,27 +105,20 @@ public: | |||
| 114 | 105 | ||
| 115 | protected: | 106 | protected: |
| 116 | /// Tries to get an object from the cache with the specified cache address | 107 | /// Tries to get an object from the cache with the specified cache address |
| 117 | T TryGet(CacheAddr addr) const { | 108 | T TryGet(VAddr addr) const { |
| 118 | const auto iter = map_cache.find(addr); | 109 | const auto iter = map_cache.find(addr); |
| 119 | if (iter != map_cache.end()) | 110 | if (iter != map_cache.end()) |
| 120 | return iter->second; | 111 | return iter->second; |
| 121 | return nullptr; | 112 | return nullptr; |
| 122 | } | 113 | } |
| 123 | 114 | ||
| 124 | T TryGet(const void* addr) const { | ||
| 125 | const auto iter = map_cache.find(ToCacheAddr(addr)); | ||
| 126 | if (iter != map_cache.end()) | ||
| 127 | return iter->second; | ||
| 128 | return nullptr; | ||
| 129 | } | ||
| 130 | |||
| 131 | /// Register an object into the cache | 115 | /// Register an object into the cache |
| 132 | virtual void Register(const T& object) { | 116 | virtual void Register(const T& object) { |
| 133 | std::lock_guard lock{mutex}; | 117 | std::lock_guard lock{mutex}; |
| 134 | 118 | ||
| 135 | object->SetIsRegistered(true); | 119 | object->SetIsRegistered(true); |
| 136 | interval_cache.add({GetInterval(object), ObjectSet{object}}); | 120 | interval_cache.add({GetInterval(object), ObjectSet{object}}); |
| 137 | map_cache.insert({object->GetCacheAddr(), object}); | 121 | map_cache.insert({object->GetCpuAddr(), object}); |
| 138 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); | 122 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); |
| 139 | } | 123 | } |
| 140 | 124 | ||
| @@ -144,7 +128,7 @@ protected: | |||
| 144 | 128 | ||
| 145 | object->SetIsRegistered(false); | 129 | object->SetIsRegistered(false); |
| 146 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | 130 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); |
| 147 | const CacheAddr addr = object->GetCacheAddr(); | 131 | const VAddr addr = object->GetCpuAddr(); |
| 148 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | 132 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); |
| 149 | map_cache.erase(addr); | 133 | map_cache.erase(addr); |
| 150 | } | 134 | } |
| @@ -173,7 +157,7 @@ protected: | |||
| 173 | 157 | ||
| 174 | private: | 158 | private: |
| 175 | /// Returns a list of cached objects from the specified memory region, ordered by access time | 159 | /// Returns a list of cached objects from the specified memory region, ordered by access time |
| 176 | std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { | 160 | std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { |
| 177 | if (size == 0) { | 161 | if (size == 0) { |
| 178 | return {}; | 162 | return {}; |
| 179 | } | 163 | } |
| @@ -197,13 +181,13 @@ private: | |||
| 197 | } | 181 | } |
| 198 | 182 | ||
| 199 | using ObjectSet = std::set<T>; | 183 | using ObjectSet = std::set<T>; |
| 200 | using ObjectCache = std::unordered_map<CacheAddr, T>; | 184 | using ObjectCache = std::unordered_map<VAddr, T>; |
| 201 | using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; | 185 | using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; |
| 202 | using ObjectInterval = typename IntervalCache::interval_type; | 186 | using ObjectInterval = typename IntervalCache::interval_type; |
| 203 | 187 | ||
| 204 | static auto GetInterval(const T& object) { | 188 | static auto GetInterval(const T& object) { |
| 205 | return ObjectInterval::right_open(object->GetCacheAddr(), | 189 | return ObjectInterval::right_open(object->GetCpuAddr(), |
| 206 | object->GetCacheAddr() + object->GetSizeInBytes()); | 190 | object->GetCpuAddr() + object->GetSizeInBytes()); |
| 207 | } | 191 | } |
| 208 | 192 | ||
| 209 | ObjectCache map_cache; | 193 | ObjectCache map_cache; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1a68e3caa..8ae5b9c4e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -53,14 +53,14 @@ public: | |||
| 53 | virtual void FlushAll() = 0; | 53 | virtual void FlushAll() = 0; |
| 54 | 54 | ||
| 55 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 55 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 56 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | 56 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 57 | 57 | ||
| 58 | /// Notify rasterizer that any caches of the specified region should be invalidated | 58 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 59 | virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | 59 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 60 | 60 | ||
| 61 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 61 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 62 | /// and invalidated | 62 | /// and invalidated |
| 63 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 63 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| 64 | 64 | ||
| 65 | /// Notify the rasterizer to send all written commands to the host GPU. | 65 | /// Notify the rasterizer to send all written commands to the host GPU. |
| 66 | virtual void FlushCommands() = 0; | 66 | virtual void FlushCommands() = 0; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0375fca17..4eb37a96c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 21 | 21 | ||
| 22 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 22 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 23 | 23 | ||
| 24 | CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) | 24 | CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) |
| 25 | : VideoCommon::BufferBlock{cache_addr, size} { | 25 | : VideoCommon::BufferBlock{cpu_addr, size} { |
| 26 | gl_buffer.Create(); | 26 | gl_buffer.Create(); |
| 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 28 | } | 28 | } |
| @@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() { | |||
| 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | 50 | Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 51 | return std::make_shared<CachedBufferBlock>(cache_addr, size); | 51 | return std::make_shared<CachedBufferBlock>(cpu_addr, size); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | void OGLBufferCache::WriteBarrier() { | 54 | void OGLBufferCache::WriteBarrier() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c7145443..d94a11252 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf | |||
| 31 | 31 | ||
| 32 | class CachedBufferBlock : public VideoCommon::BufferBlock { | 32 | class CachedBufferBlock : public VideoCommon::BufferBlock { |
| 33 | public: | 33 | public: |
| 34 | explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); | 34 | explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); |
| 35 | ~CachedBufferBlock(); | 35 | ~CachedBufferBlock(); |
| 36 | 36 | ||
| 37 | const GLuint* GetHandle() const { | 37 | const GLuint* GetHandle() const { |
| @@ -55,7 +55,7 @@ public: | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | protected: | 57 | protected: |
| 58 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | 58 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 59 | 59 | ||
| 60 | void WriteBarrier() override; | 60 | void WriteBarrier() override; |
| 61 | 61 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1a2e2a9f7..c286502ba 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 131 | return bindings; | 131 | return bindings; |
| 132 | } | 132 | } |
| 133 | 133 | ||
| 134 | bool IsASTCSupported() { | ||
| 135 | static constexpr std::array formats = { | ||
| 136 | GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, | ||
| 137 | GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, | ||
| 138 | GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, | ||
| 139 | GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, | ||
| 140 | GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR, | ||
| 141 | GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR, | ||
| 142 | GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR, | ||
| 143 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, | ||
| 144 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, | ||
| 145 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, | ||
| 146 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, | ||
| 147 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, | ||
| 148 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, | ||
| 149 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, | ||
| 150 | }; | ||
| 151 | return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { | ||
| 152 | GLint supported; | ||
| 153 | glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, | ||
| 154 | &supported); | ||
| 155 | return supported == GL_TRUE; | ||
| 156 | }) == formats.end(); | ||
| 157 | } | ||
| 158 | |||
| 134 | } // Anonymous namespace | 159 | } // Anonymous namespace |
| 135 | 160 | ||
| 136 | Device::Device() : base_bindings{BuildBaseBindings()} { | 161 | Device::Device() : base_bindings{BuildBaseBindings()} { |
| @@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 152 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; | 177 | has_shader_ballot = GLAD_GL_ARB_shader_ballot; |
| 153 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; | 178 | has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; |
| 154 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); | 179 | has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); |
| 180 | has_astc = IsASTCSupported(); | ||
| 155 | has_variable_aoffi = TestVariableAoffi(); | 181 | has_variable_aoffi = TestVariableAoffi(); |
| 156 | has_component_indexing_bug = is_amd; | 182 | has_component_indexing_bug = is_amd; |
| 157 | has_precise_bug = TestPreciseBug(); | 183 | has_precise_bug = TestPreciseBug(); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index d73b099d0..a55050cb5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -64,6 +64,10 @@ public: | |||
| 64 | return has_image_load_formatted; | 64 | return has_image_load_formatted; |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | bool HasASTC() const { | ||
| 68 | return has_astc; | ||
| 69 | } | ||
| 70 | |||
| 67 | bool HasVariableAoffi() const { | 71 | bool HasVariableAoffi() const { |
| 68 | return has_variable_aoffi; | 72 | return has_variable_aoffi; |
| 69 | } | 73 | } |
| @@ -97,6 +101,7 @@ private: | |||
| 97 | bool has_shader_ballot{}; | 101 | bool has_shader_ballot{}; |
| 98 | bool has_vertex_viewport_layer{}; | 102 | bool has_vertex_viewport_layer{}; |
| 99 | bool has_image_load_formatted{}; | 103 | bool has_image_load_formatted{}; |
| 104 | bool has_astc{}; | ||
| 100 | bool has_variable_aoffi{}; | 105 | bool has_variable_aoffi{}; |
| 101 | bool has_component_indexing_bug{}; | 106 | bool has_component_indexing_bug{}; |
| 102 | bool has_precise_bug{}; | 107 | bool has_precise_bug{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 31add708f..f4598fbf7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -140,8 +140,8 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 140 | const auto attrib = gpu.regs.vertex_attrib_format[index]; | 140 | const auto attrib = gpu.regs.vertex_attrib_format[index]; |
| 141 | const auto gl_index = static_cast<GLuint>(index); | 141 | const auto gl_index = static_cast<GLuint>(index); |
| 142 | 142 | ||
| 143 | // Ignore invalid attributes. | 143 | // Disable constant attributes. |
| 144 | if (!attrib.IsValid()) { | 144 | if (attrib.IsConstant()) { |
| 145 | glDisableVertexAttribArray(gl_index); | 145 | glDisableVertexAttribArray(gl_index); |
| 146 | continue; | 146 | continue; |
| 147 | } | 147 | } |
| @@ -345,7 +345,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { | |||
| 345 | 345 | ||
| 346 | texture_cache.GuardRenderTargets(true); | 346 | texture_cache.GuardRenderTargets(true); |
| 347 | 347 | ||
| 348 | View depth_surface = texture_cache.GetDepthBufferSurface(true); | 348 | View depth_surface = texture_cache.GetDepthBufferSurface(); |
| 349 | 349 | ||
| 350 | const auto& regs = gpu.regs; | 350 | const auto& regs = gpu.regs; |
| 351 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); | 351 | UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); |
| @@ -354,7 +354,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { | |||
| 354 | FramebufferCacheKey key; | 354 | FramebufferCacheKey key; |
| 355 | const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); | 355 | const auto colors_count = static_cast<std::size_t>(regs.rt_control.count); |
| 356 | for (std::size_t index = 0; index < colors_count; ++index) { | 356 | for (std::size_t index = 0; index < colors_count; ++index) { |
| 357 | View color_surface{texture_cache.GetColorBufferSurface(index, true)}; | 357 | View color_surface{texture_cache.GetColorBufferSurface(index)}; |
| 358 | if (!color_surface) { | 358 | if (!color_surface) { |
| 359 | continue; | 359 | continue; |
| 360 | } | 360 | } |
| @@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using | |||
| 386 | texture_cache.GuardRenderTargets(true); | 386 | texture_cache.GuardRenderTargets(true); |
| 387 | View color_surface; | 387 | View color_surface; |
| 388 | if (using_color_fb) { | 388 | if (using_color_fb) { |
| 389 | color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); | 389 | const std::size_t index = regs.clear_buffers.RT; |
| 390 | color_surface = texture_cache.GetColorBufferSurface(index); | ||
| 391 | texture_cache.MarkColorBufferInUse(index); | ||
| 390 | } | 392 | } |
| 391 | View depth_surface; | 393 | View depth_surface; |
| 392 | if (using_depth_fb || using_stencil_fb) { | 394 | if (using_depth_fb || using_stencil_fb) { |
| 393 | depth_surface = texture_cache.GetDepthBufferSurface(false); | 395 | depth_surface = texture_cache.GetDepthBufferSurface(); |
| 396 | texture_cache.MarkDepthBufferInUse(); | ||
| 394 | } | 397 | } |
| 395 | texture_cache.GuardRenderTargets(false); | 398 | texture_cache.GuardRenderTargets(false); |
| 396 | 399 | ||
| @@ -493,6 +496,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 493 | SyncPrimitiveRestart(); | 496 | SyncPrimitiveRestart(); |
| 494 | SyncScissorTest(); | 497 | SyncScissorTest(); |
| 495 | SyncPointState(); | 498 | SyncPointState(); |
| 499 | SyncLineState(); | ||
| 496 | SyncPolygonOffset(); | 500 | SyncPolygonOffset(); |
| 497 | SyncAlphaTest(); | 501 | SyncAlphaTest(); |
| 498 | SyncFramebufferSRGB(); | 502 | SyncFramebufferSRGB(); |
| @@ -653,9 +657,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 653 | 657 | ||
| 654 | void RasterizerOpenGL::FlushAll() {} | 658 | void RasterizerOpenGL::FlushAll() {} |
| 655 | 659 | ||
| 656 | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | 660 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| 657 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 661 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 658 | if (!addr || !size) { | 662 | if (addr == 0 || size == 0) { |
| 659 | return; | 663 | return; |
| 660 | } | 664 | } |
| 661 | texture_cache.FlushRegion(addr, size); | 665 | texture_cache.FlushRegion(addr, size); |
| @@ -663,9 +667,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 663 | query_cache.FlushRegion(addr, size); | 667 | query_cache.FlushRegion(addr, size); |
| 664 | } | 668 | } |
| 665 | 669 | ||
| 666 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 670 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| 667 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 671 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 668 | if (!addr || !size) { | 672 | if (addr == 0 || size == 0) { |
| 669 | return; | 673 | return; |
| 670 | } | 674 | } |
| 671 | texture_cache.InvalidateRegion(addr, size); | 675 | texture_cache.InvalidateRegion(addr, size); |
| @@ -674,7 +678,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 674 | query_cache.InvalidateRegion(addr, size); | 678 | query_cache.InvalidateRegion(addr, size); |
| 675 | } | 679 | } |
| 676 | 680 | ||
| 677 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 681 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 678 | if (Settings::values.use_accurate_gpu_emulation) { | 682 | if (Settings::values.use_accurate_gpu_emulation) { |
| 679 | FlushRegion(addr, size); | 683 | FlushRegion(addr, size); |
| 680 | } | 684 | } |
| @@ -713,8 +717,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 713 | 717 | ||
| 714 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 718 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 715 | 719 | ||
| 716 | const auto surface{ | 720 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; |
| 717 | texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))}; | ||
| 718 | if (!surface) { | 721 | if (!surface) { |
| 719 | return {}; | 722 | return {}; |
| 720 | } | 723 | } |
| @@ -1309,6 +1312,19 @@ void RasterizerOpenGL::SyncPointState() { | |||
| 1309 | glDisable(GL_PROGRAM_POINT_SIZE); | 1312 | glDisable(GL_PROGRAM_POINT_SIZE); |
| 1310 | } | 1313 | } |
| 1311 | 1314 | ||
| 1315 | void RasterizerOpenGL::SyncLineState() { | ||
| 1316 | auto& gpu = system.GPU().Maxwell3D(); | ||
| 1317 | auto& flags = gpu.dirty.flags; | ||
| 1318 | if (!flags[Dirty::LineWidth]) { | ||
| 1319 | return; | ||
| 1320 | } | ||
| 1321 | flags[Dirty::LineWidth] = false; | ||
| 1322 | |||
| 1323 | const auto& regs = gpu.regs; | ||
| 1324 | oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable); | ||
| 1325 | glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased); | ||
| 1326 | } | ||
| 1327 | |||
| 1312 | void RasterizerOpenGL::SyncPolygonOffset() { | 1328 | void RasterizerOpenGL::SyncPolygonOffset() { |
| 1313 | auto& gpu = system.GPU().Maxwell3D(); | 1329 | auto& gpu = system.GPU().Maxwell3D(); |
| 1314 | auto& flags = gpu.dirty.flags; | 1330 | auto& flags = gpu.dirty.flags; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2d3be2437..435da4425 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -65,9 +65,9 @@ public: | |||
| 65 | void ResetCounter(VideoCore::QueryType type) override; | 65 | void ResetCounter(VideoCore::QueryType type) override; |
| 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 67 | void FlushAll() override; | 67 | void FlushAll() override; |
| 68 | void FlushRegion(CacheAddr addr, u64 size) override; | 68 | void FlushRegion(VAddr addr, u64 size) override; |
| 69 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 69 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 70 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 70 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 71 | void FlushCommands() override; | 71 | void FlushCommands() override; |
| 72 | void TickFrame() override; | 72 | void TickFrame() override; |
| 73 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 73 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| @@ -171,6 +171,9 @@ private: | |||
| 171 | /// Syncs the point state to match the guest state | 171 | /// Syncs the point state to match the guest state |
| 172 | void SyncPointState(); | 172 | void SyncPointState(); |
| 173 | 173 | ||
| 174 | /// Syncs the line state to match the guest state | ||
| 175 | void SyncLineState(); | ||
| 176 | |||
| 174 | /// Syncs the rasterizer enable state to match the guest state | 177 | /// Syncs the rasterizer enable state to match the guest state |
| 175 | void SyncRasterizeEnable(); | 178 | void SyncRasterizeEnable(); |
| 176 | 179 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 046ee55a5..12c6dcfde 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -34,6 +34,8 @@ | |||
| 34 | namespace OpenGL { | 34 | namespace OpenGL { |
| 35 | 35 | ||
| 36 | using Tegra::Engines::ShaderType; | 36 | using Tegra::Engines::ShaderType; |
| 37 | using VideoCommon::Shader::CompileDepth; | ||
| 38 | using VideoCommon::Shader::CompilerSettings; | ||
| 37 | using VideoCommon::Shader::ProgramCode; | 39 | using VideoCommon::Shader::ProgramCode; |
| 38 | using VideoCommon::Shader::Registry; | 40 | using VideoCommon::Shader::Registry; |
| 39 | using VideoCommon::Shader::ShaderIR; | 41 | using VideoCommon::Shader::ShaderIR; |
| @@ -43,7 +45,7 @@ namespace { | |||
| 43 | constexpr u32 STAGE_MAIN_OFFSET = 10; | 45 | constexpr u32 STAGE_MAIN_OFFSET = 10; |
| 44 | constexpr u32 KERNEL_MAIN_OFFSET = 0; | 46 | constexpr u32 KERNEL_MAIN_OFFSET = 0; |
| 45 | 47 | ||
| 46 | constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; | 48 | constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile}; |
| 47 | 49 | ||
| 48 | /// Gets the address for the specified shader stage program | 50 | /// Gets the address for the specified shader stage program |
| 49 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { | 51 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { |
| @@ -214,11 +216,11 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 214 | 216 | ||
| 215 | } // Anonymous namespace | 217 | } // Anonymous namespace |
| 216 | 218 | ||
| 217 | CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, | 219 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 218 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 220 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 219 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) | 221 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) |
| 220 | : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, | 222 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, |
| 221 | cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} | 223 | size_in_bytes{size_in_bytes}, program{std::move(program)} {} |
| 222 | 224 | ||
| 223 | CachedShader::~CachedShader() = default; | 225 | CachedShader::~CachedShader() = default; |
| 224 | 226 | ||
| @@ -254,9 +256,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 254 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 256 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 255 | params.disk_cache.SaveEntry(std::move(entry)); | 257 | params.disk_cache.SaveEntry(std::move(entry)); |
| 256 | 258 | ||
| 257 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | 259 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 258 | size_in_bytes, std::move(registry), | 260 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); |
| 259 | MakeEntries(ir), std::move(program))); | ||
| 260 | } | 261 | } |
| 261 | 262 | ||
| 262 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 263 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| @@ -279,17 +280,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog | |||
| 279 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 280 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 280 | params.disk_cache.SaveEntry(std::move(entry)); | 281 | params.disk_cache.SaveEntry(std::move(entry)); |
| 281 | 282 | ||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | 283 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 283 | size_in_bytes, std::move(registry), | 284 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); |
| 284 | MakeEntries(ir), std::move(program))); | ||
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 287 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 288 | const PrecompiledShader& precompiled_shader, | 288 | const PrecompiledShader& precompiled_shader, |
| 289 | std::size_t size_in_bytes) { | 289 | std::size_t size_in_bytes) { |
| 290 | return std::shared_ptr<CachedShader>(new CachedShader( | 290 | return std::shared_ptr<CachedShader>( |
| 291 | params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, | 291 | new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, |
| 292 | precompiled_shader.entries, precompiled_shader.program)); | 292 | precompiled_shader.entries, precompiled_shader.program)); |
| 293 | } | 293 | } |
| 294 | 294 | ||
| 295 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 295 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -449,12 +449,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 449 | const GPUVAddr address{GetShaderAddress(system, program)}; | 449 | const GPUVAddr address{GetShaderAddress(system, program)}; |
| 450 | 450 | ||
| 451 | // Look up shader in the cache based on address | 451 | // Look up shader in the cache based on address |
| 452 | const auto host_ptr{memory_manager.GetPointer(address)}; | 452 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |
| 453 | Shader shader{TryGet(host_ptr)}; | 453 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; |
| 454 | if (shader) { | 454 | if (shader) { |
| 455 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 455 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 456 | } | 456 | } |
| 457 | 457 | ||
| 458 | const auto host_ptr{memory_manager.GetPointer(address)}; | ||
| 459 | |||
| 458 | // No shader found - create a new one | 460 | // No shader found - create a new one |
| 459 | ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; | 461 | ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; |
| 460 | ProgramCode code_b; | 462 | ProgramCode code_b; |
| @@ -465,9 +467,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 465 | 467 | ||
| 466 | const auto unique_identifier = GetUniqueIdentifier( | 468 | const auto unique_identifier = GetUniqueIdentifier( |
| 467 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 469 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |
| 468 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | 470 | |
| 469 | const ShaderParameters params{system, disk_cache, device, | 471 | const ShaderParameters params{system, disk_cache, device, |
| 470 | cpu_addr, host_ptr, unique_identifier}; | 472 | *cpu_addr, host_ptr, unique_identifier}; |
| 471 | 473 | ||
| 472 | const auto found = runtime_cache.find(unique_identifier); | 474 | const auto found = runtime_cache.find(unique_identifier); |
| 473 | if (found == runtime_cache.end()) { | 475 | if (found == runtime_cache.end()) { |
| @@ -484,18 +486,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 484 | 486 | ||
| 485 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | 487 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { |
| 486 | auto& memory_manager{system.GPU().MemoryManager()}; | 488 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 487 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | 489 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; |
| 488 | auto kernel = TryGet(host_ptr); | 490 | |
| 491 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||
| 489 | if (kernel) { | 492 | if (kernel) { |
| 490 | return kernel; | 493 | return kernel; |
| 491 | } | 494 | } |
| 492 | 495 | ||
| 496 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 493 | // No kernel found, create a new one | 497 | // No kernel found, create a new one |
| 494 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 498 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 495 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | 499 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; |
| 496 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 500 | |
| 497 | const ShaderParameters params{system, disk_cache, device, | 501 | const ShaderParameters params{system, disk_cache, device, |
| 498 | cpu_addr, host_ptr, unique_identifier}; | 502 | *cpu_addr, host_ptr, unique_identifier}; |
| 499 | 503 | ||
| 500 | const auto found = runtime_cache.find(unique_identifier); | 504 | const auto found = runtime_cache.find(unique_identifier); |
| 501 | if (found == runtime_cache.end()) { | 505 | if (found == runtime_cache.end()) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 4935019fc..c836df5bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -65,11 +65,6 @@ public: | |||
| 65 | /// Gets the GL program handle for the shader | 65 | /// Gets the GL program handle for the shader |
| 66 | GLuint GetHandle() const; | 66 | GLuint GetHandle() const; |
| 67 | 67 | ||
| 68 | /// Returns the guest CPU address of the shader | ||
| 69 | VAddr GetCpuAddr() const override { | ||
| 70 | return cpu_addr; | ||
| 71 | } | ||
| 72 | |||
| 73 | /// Returns the size in bytes of the shader | 68 | /// Returns the size in bytes of the shader |
| 74 | std::size_t GetSizeInBytes() const override { | 69 | std::size_t GetSizeInBytes() const override { |
| 75 | return size_in_bytes; | 70 | return size_in_bytes; |
| @@ -90,13 +85,12 @@ public: | |||
| 90 | std::size_t size_in_bytes); | 85 | std::size_t size_in_bytes); |
| 91 | 86 | ||
| 92 | private: | 87 | private: |
| 93 | explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, | 88 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 94 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 89 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 95 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | 90 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); |
| 96 | 91 | ||
| 97 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 92 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 98 | ShaderEntries entries; | 93 | ShaderEntries entries; |
| 99 | VAddr cpu_addr = 0; | ||
| 100 | std::size_t size_in_bytes = 0; | 94 | std::size_t size_in_bytes = 0; |
| 101 | std::shared_ptr<OGLProgram> program; | 95 | std::shared_ptr<OGLProgram> program; |
| 102 | }; | 96 | }; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a25280a47..b1804e9ea 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -31,11 +31,11 @@ namespace { | |||
| 31 | 31 | ||
| 32 | using Tegra::Engines::ShaderType; | 32 | using Tegra::Engines::ShaderType; |
| 33 | using Tegra::Shader::Attribute; | 33 | using Tegra::Shader::Attribute; |
| 34 | using Tegra::Shader::AttributeUse; | ||
| 35 | using Tegra::Shader::Header; | 34 | using Tegra::Shader::Header; |
| 36 | using Tegra::Shader::IpaInterpMode; | 35 | using Tegra::Shader::IpaInterpMode; |
| 37 | using Tegra::Shader::IpaMode; | 36 | using Tegra::Shader::IpaMode; |
| 38 | using Tegra::Shader::IpaSampleMode; | 37 | using Tegra::Shader::IpaSampleMode; |
| 38 | using Tegra::Shader::PixelImap; | ||
| 39 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using VideoCommon::Shader::BuildTransformFeedback; | 40 | using VideoCommon::Shader::BuildTransformFeedback; |
| 41 | using VideoCommon::Shader::Registry; | 41 | using VideoCommon::Shader::Registry; |
| @@ -702,20 +702,19 @@ private: | |||
| 702 | code.AddNewLine(); | 702 | code.AddNewLine(); |
| 703 | } | 703 | } |
| 704 | 704 | ||
| 705 | std::string GetInputFlags(AttributeUse attribute) { | 705 | const char* GetInputFlags(PixelImap attribute) { |
| 706 | switch (attribute) { | 706 | switch (attribute) { |
| 707 | case AttributeUse::Perspective: | 707 | case PixelImap::Perspective: |
| 708 | // Default, Smooth | 708 | return "smooth"; |
| 709 | return {}; | 709 | case PixelImap::Constant: |
| 710 | case AttributeUse::Constant: | 710 | return "flat"; |
| 711 | return "flat "; | 711 | case PixelImap::ScreenLinear: |
| 712 | case AttributeUse::ScreenLinear: | 712 | return "noperspective"; |
| 713 | return "noperspective "; | 713 | case PixelImap::Unused: |
| 714 | default: | 714 | break; |
| 715 | case AttributeUse::Unused: | ||
| 716 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); | ||
| 717 | return {}; | ||
| 718 | } | 715 | } |
| 716 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); | ||
| 717 | return {}; | ||
| 719 | } | 718 | } |
| 720 | 719 | ||
| 721 | void DeclareInputAttributes() { | 720 | void DeclareInputAttributes() { |
| @@ -749,8 +748,8 @@ private: | |||
| 749 | 748 | ||
| 750 | std::string suffix; | 749 | std::string suffix; |
| 751 | if (stage == ShaderType::Fragment) { | 750 | if (stage == ShaderType::Fragment) { |
| 752 | const auto input_mode{header.ps.GetAttributeUse(location)}; | 751 | const auto input_mode{header.ps.GetPixelImap(location)}; |
| 753 | if (skip_unused && input_mode == AttributeUse::Unused) { | 752 | if (input_mode == PixelImap::Unused) { |
| 754 | return; | 753 | return; |
| 755 | } | 754 | } |
| 756 | suffix = GetInputFlags(input_mode); | 755 | suffix = GetInputFlags(input_mode); |
| @@ -927,7 +926,7 @@ private: | |||
| 927 | const u32 address{generic_base + index * generic_stride + element * element_stride}; | 926 | const u32 address{generic_base + index * generic_stride + element * element_stride}; |
| 928 | 927 | ||
| 929 | const bool declared = stage != ShaderType::Fragment || | 928 | const bool declared = stage != ShaderType::Fragment || |
| 930 | header.ps.GetAttributeUse(index) != AttributeUse::Unused; | 929 | header.ps.GetPixelImap(index) != PixelImap::Unused; |
| 931 | const std::string value = | 930 | const std::string value = |
| 932 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; | 931 | declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; |
| 933 | code.AddLine("case 0x{:X}U: return {};", address, value); | 932 | code.AddLine("case 0x{:X}U: return {};", address, value); |
| @@ -1142,8 +1141,7 @@ private: | |||
| 1142 | GetSwizzle(element)), | 1141 | GetSwizzle(element)), |
| 1143 | Type::Float}; | 1142 | Type::Float}; |
| 1144 | case ShaderType::Fragment: | 1143 | case ShaderType::Fragment: |
| 1145 | return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), | 1144 | return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; |
| 1146 | Type::Float}; | ||
| 1147 | default: | 1145 | default: |
| 1148 | UNREACHABLE(); | 1146 | UNREACHABLE(); |
| 1149 | } | 1147 | } |
| @@ -1821,15 +1819,17 @@ private: | |||
| 1821 | } | 1819 | } |
| 1822 | 1820 | ||
| 1823 | Expression HMergeH0(Operation operation) { | 1821 | Expression HMergeH0(Operation operation) { |
| 1824 | std::string dest = VisitOperand(operation, 0).AsUint(); | 1822 | const std::string dest = VisitOperand(operation, 0).AsUint(); |
| 1825 | std::string src = VisitOperand(operation, 1).AsUint(); | 1823 | const std::string src = VisitOperand(operation, 1).AsUint(); |
| 1826 | return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint}; | 1824 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", src, dest), |
| 1825 | Type::HalfFloat}; | ||
| 1827 | } | 1826 | } |
| 1828 | 1827 | ||
| 1829 | Expression HMergeH1(Operation operation) { | 1828 | Expression HMergeH1(Operation operation) { |
| 1830 | std::string dest = VisitOperand(operation, 0).AsUint(); | 1829 | const std::string dest = VisitOperand(operation, 0).AsUint(); |
| 1831 | std::string src = VisitOperand(operation, 1).AsUint(); | 1830 | const std::string src = VisitOperand(operation, 1).AsUint(); |
| 1832 | return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint}; | 1831 | return {fmt::format("vec2(unpackHalf2x16({}).x, unpackHalf2x16({}).y)", dest, src), |
| 1832 | Type::HalfFloat}; | ||
| 1833 | } | 1833 | } |
| 1834 | 1834 | ||
| 1835 | Expression HPack2(Operation operation) { | 1835 | Expression HPack2(Operation operation) { |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 255ac3147..d24fad3de 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -185,6 +185,12 @@ void SetupDirtyPointSize(Tables& tables) { | |||
| 185 | tables[0][OFF(point_sprite_enable)] = PointSize; | 185 | tables[0][OFF(point_sprite_enable)] = PointSize; |
| 186 | } | 186 | } |
| 187 | 187 | ||
| 188 | void SetupDirtyLineWidth(Tables& tables) { | ||
| 189 | tables[0][OFF(line_width_smooth)] = LineWidth; | ||
| 190 | tables[0][OFF(line_width_aliased)] = LineWidth; | ||
| 191 | tables[0][OFF(line_smooth_enable)] = LineWidth; | ||
| 192 | } | ||
| 193 | |||
| 188 | void SetupDirtyClipControl(Tables& tables) { | 194 | void SetupDirtyClipControl(Tables& tables) { |
| 189 | auto& table = tables[0]; | 195 | auto& table = tables[0]; |
| 190 | table[OFF(screen_y_control)] = ClipControl; | 196 | table[OFF(screen_y_control)] = ClipControl; |
| @@ -233,6 +239,7 @@ void StateTracker::Initialize() { | |||
| 233 | SetupDirtyLogicOp(tables); | 239 | SetupDirtyLogicOp(tables); |
| 234 | SetupDirtyFragmentClampColor(tables); | 240 | SetupDirtyFragmentClampColor(tables); |
| 235 | SetupDirtyPointSize(tables); | 241 | SetupDirtyPointSize(tables); |
| 242 | SetupDirtyLineWidth(tables); | ||
| 236 | SetupDirtyClipControl(tables); | 243 | SetupDirtyClipControl(tables); |
| 237 | SetupDirtyDepthClampEnabled(tables); | 244 | SetupDirtyDepthClampEnabled(tables); |
| 238 | SetupDirtyMisc(tables); | 245 | SetupDirtyMisc(tables); |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index b882d75c3..0f823288e 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -78,6 +78,7 @@ enum : u8 { | |||
| 78 | LogicOp, | 78 | LogicOp, |
| 79 | FragmentClampColor, | 79 | FragmentClampColor, |
| 80 | PointSize, | 80 | PointSize, |
| 81 | LineWidth, | ||
| 81 | ClipControl, | 82 | ClipControl, |
| 82 | DepthClampEnabled, | 83 | DepthClampEnabled, |
| 83 | 84 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f424e3000..2729d1265 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource; | |||
| 24 | using VideoCore::MortonSwizzleMode; | 24 | using VideoCore::MortonSwizzleMode; |
| 25 | 25 | ||
| 26 | using VideoCore::Surface::PixelFormat; | 26 | using VideoCore::Surface::PixelFormat; |
| 27 | using VideoCore::Surface::SurfaceCompression; | ||
| 28 | using VideoCore::Surface::SurfaceTarget; | 27 | using VideoCore::Surface::SurfaceTarget; |
| 29 | using VideoCore::Surface::SurfaceType; | 28 | using VideoCore::Surface::SurfaceType; |
| 30 | 29 | ||
| @@ -37,102 +36,100 @@ namespace { | |||
| 37 | 36 | ||
| 38 | struct FormatTuple { | 37 | struct FormatTuple { |
| 39 | GLint internal_format; | 38 | GLint internal_format; |
| 40 | GLenum format; | 39 | GLenum format = GL_NONE; |
| 41 | GLenum type; | 40 | GLenum type = GL_NONE; |
| 42 | bool compressed; | ||
| 43 | }; | 41 | }; |
| 44 | 42 | ||
| 45 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | 43 | constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ |
| 46 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U | 44 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U |
| 47 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S | 45 | {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S |
| 48 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI | 46 | {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI |
| 49 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U | 47 | {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U |
| 50 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U | 48 | {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U |
| 51 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U | 49 | {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U |
| 52 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U | 50 | {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U |
| 53 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI | 51 | {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI |
| 54 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F | 52 | {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F |
| 55 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U | 53 | {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U |
| 56 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S | 54 | {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S |
| 57 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI | 55 | {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI |
| 58 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F | 56 | {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F |
| 59 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI | 57 | {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI |
| 60 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 | 58 | {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1 |
| 61 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 | 59 | {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23 |
| 62 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 | 60 | {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45 |
| 63 | {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 | 61 | {GL_COMPRESSED_RED_RGTC1}, // DXN1 |
| 64 | {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM | 62 | {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM |
| 65 | {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM | 63 | {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM |
| 66 | {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U | 64 | {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U |
| 67 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16 | 65 | {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16 |
| 68 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16 | 66 | {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16 |
| 69 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4 | 67 | {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4 |
| 70 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 | 68 | {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 |
| 71 | {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F | 69 | {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F |
| 72 | {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F | 70 | {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F |
| 73 | {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F | 71 | {GL_R32F, GL_RED, GL_FLOAT}, // R32F |
| 74 | {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F | 72 | {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F |
| 75 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U | 73 | {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U |
| 76 | {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S | 74 | {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S |
| 77 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI | 75 | {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI |
| 78 | {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I | 76 | {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I |
| 79 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16 | 77 | {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16 |
| 80 | {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F | 78 | {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F |
| 81 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI | 79 | {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI |
| 82 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I | 80 | {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I |
| 83 | {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S | 81 | {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S |
| 84 | {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F | 82 | {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F |
| 85 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB | 83 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB |
| 86 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U | 84 | {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U |
| 87 | {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S | 85 | {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S |
| 88 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI | 86 | {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI |
| 89 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F | 87 | {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F |
| 90 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI | 88 | {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI |
| 91 | {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I | 89 | {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I |
| 92 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 | 90 | {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8 |
| 93 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 | 91 | {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5 |
| 94 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 | 92 | {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4 |
| 95 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 | 93 | {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 |
| 96 | // Compressed sRGB formats | 94 | // Compressed sRGB formats |
| 97 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB | 95 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB |
| 98 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB | 96 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB |
| 99 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB | 97 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB |
| 100 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB | 98 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB |
| 101 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U | 99 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U |
| 102 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB | 100 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB |
| 103 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB | 101 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB |
| 104 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB | 102 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB |
| 105 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB | 103 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB |
| 106 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5 | 104 | {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5 |
| 107 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB | 105 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB |
| 108 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8 | 106 | {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8 |
| 109 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB | 107 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB |
| 110 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6 | 108 | {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6 |
| 111 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB | 109 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB |
| 112 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10 | 110 | {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10 |
| 113 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB | 111 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB |
| 114 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12 | 112 | {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12 |
| 115 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB | 113 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB |
| 116 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6 | 114 | {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6 |
| 117 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB | 115 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB |
| 118 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5 | 116 | {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5 |
| 119 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB | 117 | {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB |
| 120 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F | 118 | {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F |
| 121 | 119 | ||
| 122 | // Depth formats | 120 | // Depth formats |
| 123 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F | 121 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F |
| 124 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16 | 122 | {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16 |
| 125 | 123 | ||
| 126 | // DepthStencil formats | 124 | // DepthStencil formats |
| 127 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8 | 125 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8 |
| 128 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24 | 126 | {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24 |
| 129 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8 | 127 | {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8 |
| 130 | }}; | 128 | }}; |
| 131 | 129 | ||
| 132 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | 130 | const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { |
| 133 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | 131 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); |
| 134 | const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; | 132 | return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; |
| 135 | return format; | ||
| 136 | } | 133 | } |
| 137 | 134 | ||
| 138 | GLenum GetTextureTarget(const SurfaceTarget& target) { | 135 | GLenum GetTextureTarget(const SurfaceTarget& target) { |
| @@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte | |||
| 242 | 239 | ||
| 243 | } // Anonymous namespace | 240 | } // Anonymous namespace |
| 244 | 241 | ||
| 245 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) | 242 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, |
| 246 | : VideoCommon::SurfaceBase<View>(gpu_addr, params) { | 243 | bool is_astc_supported) |
| 247 | const auto& tuple{GetFormatTuple(params.pixel_format)}; | 244 | : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) { |
| 248 | internal_format = tuple.internal_format; | 245 | if (is_converted) { |
| 249 | format = tuple.format; | 246 | internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; |
| 250 | type = tuple.type; | 247 | format = GL_RGBA; |
| 251 | is_compressed = tuple.compressed; | 248 | type = GL_UNSIGNED_BYTE; |
| 249 | } else { | ||
| 250 | const auto& tuple{GetFormatTuple(params.pixel_format)}; | ||
| 251 | internal_format = tuple.internal_format; | ||
| 252 | format = tuple.format; | ||
| 253 | type = tuple.type; | ||
| 254 | is_compressed = params.IsCompressed(); | ||
| 255 | } | ||
| 252 | target = GetTextureTarget(params.target); | 256 | target = GetTextureTarget(params.target); |
| 253 | texture = CreateTexture(params, target, internal_format, texture_buffer); | 257 | texture = CreateTexture(params, target, internal_format, texture_buffer); |
| 254 | DecorateSurfaceName(); | 258 | DecorateSurfaceName(); |
| @@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | |||
| 264 | 268 | ||
| 265 | if (params.IsBuffer()) { | 269 | if (params.IsBuffer()) { |
| 266 | glGetNamedBufferSubData(texture_buffer.handle, 0, | 270 | glGetNamedBufferSubData(texture_buffer.handle, 0, |
| 267 | static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), | 271 | static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), |
| 268 | staging_buffer.data()); | 272 | staging_buffer.data()); |
| 269 | return; | 273 | return; |
| 270 | } | 274 | } |
| @@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | |||
| 272 | SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); | 276 | SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); |
| 273 | 277 | ||
| 274 | for (u32 level = 0; level < params.emulated_levels; ++level) { | 278 | for (u32 level = 0; level < params.emulated_levels; ++level) { |
| 275 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | 279 | glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); |
| 276 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 280 | glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); |
| 277 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); | 281 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); |
| 282 | |||
| 278 | u8* const mip_data = staging_buffer.data() + mip_offset; | 283 | u8* const mip_data = staging_buffer.data() + mip_offset; |
| 279 | const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); | 284 | const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); |
| 280 | if (is_compressed) { | 285 | if (is_compressed) { |
| @@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { | |||
| 294 | } | 299 | } |
| 295 | 300 | ||
| 296 | void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { | 301 | void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { |
| 297 | glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); | 302 | glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); |
| 298 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); | 303 | glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); |
| 299 | 304 | ||
| 300 | auto compression_type = params.GetCompressionType(); | 305 | const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); |
| 301 | |||
| 302 | const std::size_t mip_offset = compression_type == SurfaceCompression::Converted | ||
| 303 | ? params.GetConvertedMipmapOffset(level) | ||
| 304 | : params.GetHostMipmapLevelOffset(level); | ||
| 305 | const u8* buffer{staging_buffer.data() + mip_offset}; | 306 | const u8* buffer{staging_buffer.data() + mip_offset}; |
| 306 | if (is_compressed) { | 307 | if (is_compressed) { |
| 307 | const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; | 308 | const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; |
| @@ -410,14 +411,13 @@ CachedSurfaceView::~CachedSurfaceView() = default; | |||
| 410 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | 411 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { |
| 411 | ASSERT(params.num_levels == 1); | 412 | ASSERT(params.num_levels == 1); |
| 412 | 413 | ||
| 413 | const GLuint texture = surface.GetTexture(); | ||
| 414 | if (params.num_layers > 1) { | 414 | if (params.num_layers > 1) { |
| 415 | // Layered framebuffer attachments | 415 | // Layered framebuffer attachments |
| 416 | UNIMPLEMENTED_IF(params.base_layer != 0); | 416 | UNIMPLEMENTED_IF(params.base_layer != 0); |
| 417 | 417 | ||
| 418 | switch (params.target) { | 418 | switch (params.target) { |
| 419 | case SurfaceTarget::Texture2DArray: | 419 | case SurfaceTarget::Texture2DArray: |
| 420 | glFramebufferTexture(target, attachment, texture, params.base_level); | 420 | glFramebufferTexture(target, attachment, GetTexture(), 0); |
| 421 | break; | 421 | break; |
| 422 | default: | 422 | default: |
| 423 | UNIMPLEMENTED(); | 423 | UNIMPLEMENTED(); |
| @@ -426,6 +426,7 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | |||
| 426 | } | 426 | } |
| 427 | 427 | ||
| 428 | const GLenum view_target = surface.GetTarget(); | 428 | const GLenum view_target = surface.GetTarget(); |
| 429 | const GLuint texture = surface.GetTexture(); | ||
| 429 | switch (surface.GetSurfaceParams().target) { | 430 | switch (surface.GetSurfaceParams().target) { |
| 430 | case SurfaceTarget::Texture1D: | 431 | case SurfaceTarget::Texture1D: |
| 431 | glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); | 432 | glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); |
| @@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { | |||
| 482 | TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, | 483 | TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, |
| 483 | VideoCore::RasterizerInterface& rasterizer, | 484 | VideoCore::RasterizerInterface& rasterizer, |
| 484 | const Device& device, StateTracker& state_tracker) | 485 | const Device& device, StateTracker& state_tracker) |
| 485 | : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { | 486 | : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} { |
| 486 | src_framebuffer.Create(); | 487 | src_framebuffer.Create(); |
| 487 | dst_framebuffer.Create(); | 488 | dst_framebuffer.Create(); |
| 488 | } | 489 | } |
| @@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, | |||
| 490 | TextureCacheOpenGL::~TextureCacheOpenGL() = default; | 491 | TextureCacheOpenGL::~TextureCacheOpenGL() = default; |
| 491 | 492 | ||
| 492 | Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { | 493 | Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { |
| 493 | return std::make_shared<CachedSurface>(gpu_addr, params); | 494 | return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); |
| 494 | } | 495 | } |
| 495 | 496 | ||
| 496 | void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, | 497 | void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, |
| @@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) | |||
| 596 | 597 | ||
| 597 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); | 598 | glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); |
| 598 | 599 | ||
| 599 | if (source_format.compressed) { | 600 | if (src_surface->IsCompressed()) { |
| 600 | glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), | 601 | glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), |
| 601 | nullptr); | 602 | nullptr); |
| 602 | } else { | 603 | } else { |
| @@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) | |||
| 610 | const GLsizei width = static_cast<GLsizei>(dst_params.width); | 611 | const GLsizei width = static_cast<GLsizei>(dst_params.width); |
| 611 | const GLsizei height = static_cast<GLsizei>(dst_params.height); | 612 | const GLsizei height = static_cast<GLsizei>(dst_params.height); |
| 612 | const GLsizei depth = static_cast<GLsizei>(dst_params.depth); | 613 | const GLsizei depth = static_cast<GLsizei>(dst_params.depth); |
| 613 | if (dest_format.compressed) { | 614 | if (dst_surface->IsCompressed()) { |
| 614 | LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); | 615 | LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); |
| 615 | UNREACHABLE(); | 616 | UNREACHABLE(); |
| 616 | } else { | 617 | } else { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 6658c6ffd..02d9981a1 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> { | |||
| 37 | friend CachedSurfaceView; | 37 | friend CachedSurfaceView; |
| 38 | 38 | ||
| 39 | public: | 39 | public: |
| 40 | explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); | 40 | explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported); |
| 41 | ~CachedSurface(); | 41 | ~CachedSurface(); |
| 42 | 42 | ||
| 43 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | 43 | void UploadTexture(const std::vector<u8>& staging_buffer) override; |
| @@ -51,6 +51,10 @@ public: | |||
| 51 | return texture.handle; | 51 | return texture.handle; |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | bool IsCompressed() const { | ||
| 55 | return is_compressed; | ||
| 56 | } | ||
| 57 | |||
| 54 | protected: | 58 | protected: |
| 55 | void DecorateSurfaceName() override; | 59 | void DecorateSurfaceName() override; |
| 56 | 60 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f1a28cc21..b2a179746 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -315,8 +315,8 @@ public: | |||
| 315 | 315 | ||
| 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, | 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, |
| 317 | Core::Frontend::GraphicsContext& context) | 317 | Core::Frontend::GraphicsContext& context) |
| 318 | : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, | 318 | : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, |
| 319 | frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} | 319 | has_debug_tool{HasDebugTool()} {} |
| 320 | 320 | ||
| 321 | RendererOpenGL::~RendererOpenGL() = default; | 321 | RendererOpenGL::~RendererOpenGL() = default; |
| 322 | 322 | ||
diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h deleted file mode 100644 index 323bf6b39..000000000 --- a/src/video_core/renderer_vulkan/declarations.h +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | namespace vk { | ||
| 8 | class DispatchLoaderDynamic; | ||
| 9 | } | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | constexpr vk::DispatchLoaderDynamic* dont_use_me_dld = nullptr; | ||
| 13 | } | ||
| 14 | |||
| 15 | #define VULKAN_HPP_DEFAULT_DISPATCHER (*::Vulkan::dont_use_me_dld) | ||
| 16 | #define VULKAN_HPP_ENABLE_DYNAMIC_LOADER_TOOL 0 | ||
| 17 | #define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 | ||
| 18 | #include <vulkan/vulkan.hpp> | ||
| 19 | |||
| 20 | namespace Vulkan { | ||
| 21 | |||
| 22 | // vulkan.hpp unique handlers use DispatchLoaderStatic | ||
| 23 | template <typename T> | ||
| 24 | using UniqueHandle = vk::UniqueHandle<T, vk::DispatchLoaderDynamic>; | ||
| 25 | |||
| 26 | using UniqueAccelerationStructureNV = UniqueHandle<vk::AccelerationStructureNV>; | ||
| 27 | using UniqueBuffer = UniqueHandle<vk::Buffer>; | ||
| 28 | using UniqueBufferView = UniqueHandle<vk::BufferView>; | ||
| 29 | using UniqueCommandBuffer = UniqueHandle<vk::CommandBuffer>; | ||
| 30 | using UniqueCommandPool = UniqueHandle<vk::CommandPool>; | ||
| 31 | using UniqueDescriptorPool = UniqueHandle<vk::DescriptorPool>; | ||
| 32 | using UniqueDescriptorSet = UniqueHandle<vk::DescriptorSet>; | ||
| 33 | using UniqueDescriptorSetLayout = UniqueHandle<vk::DescriptorSetLayout>; | ||
| 34 | using UniqueDescriptorUpdateTemplate = UniqueHandle<vk::DescriptorUpdateTemplate>; | ||
| 35 | using UniqueDevice = UniqueHandle<vk::Device>; | ||
| 36 | using UniqueDeviceMemory = UniqueHandle<vk::DeviceMemory>; | ||
| 37 | using UniqueEvent = UniqueHandle<vk::Event>; | ||
| 38 | using UniqueFence = UniqueHandle<vk::Fence>; | ||
| 39 | using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; | ||
| 40 | using UniqueImage = UniqueHandle<vk::Image>; | ||
| 41 | using UniqueImageView = UniqueHandle<vk::ImageView>; | ||
| 42 | using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; | ||
| 43 | using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; | ||
| 44 | using UniquePipeline = UniqueHandle<vk::Pipeline>; | ||
| 45 | using UniquePipelineCache = UniqueHandle<vk::PipelineCache>; | ||
| 46 | using UniquePipelineLayout = UniqueHandle<vk::PipelineLayout>; | ||
| 47 | using UniqueQueryPool = UniqueHandle<vk::QueryPool>; | ||
| 48 | using UniqueRenderPass = UniqueHandle<vk::RenderPass>; | ||
| 49 | using UniqueSampler = UniqueHandle<vk::Sampler>; | ||
| 50 | using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; | ||
| 51 | using UniqueSemaphore = UniqueHandle<vk::Semaphore>; | ||
| 52 | using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; | ||
| 53 | using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; | ||
| 54 | using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; | ||
| 55 | using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>; | ||
| 56 | using UniqueDebugUtilsMessengerEXT = UniqueHandle<vk::DebugUtilsMessengerEXT>; | ||
| 57 | |||
| 58 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 7480cb7c3..8681b821f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -2,13 +2,15 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <iterator> | ||
| 6 | |||
| 5 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 6 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 7 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | 12 | #include "video_core/renderer_vulkan/vk_device.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 12 | #include "video_core/surface.h" | 14 | #include "video_core/surface.h" |
| 13 | 15 | ||
| 14 | namespace Vulkan::MaxwellToVK { | 16 | namespace Vulkan::MaxwellToVK { |
| @@ -17,88 +19,89 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 17 | 19 | ||
| 18 | namespace Sampler { | 20 | namespace Sampler { |
| 19 | 21 | ||
| 20 | vk::Filter Filter(Tegra::Texture::TextureFilter filter) { | 22 | VkFilter Filter(Tegra::Texture::TextureFilter filter) { |
| 21 | switch (filter) { | 23 | switch (filter) { |
| 22 | case Tegra::Texture::TextureFilter::Linear: | 24 | case Tegra::Texture::TextureFilter::Linear: |
| 23 | return vk::Filter::eLinear; | 25 | return VK_FILTER_LINEAR; |
| 24 | case Tegra::Texture::TextureFilter::Nearest: | 26 | case Tegra::Texture::TextureFilter::Nearest: |
| 25 | return vk::Filter::eNearest; | 27 | return VK_FILTER_NEAREST; |
| 26 | } | 28 | } |
| 27 | UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); | 29 | UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); |
| 28 | return {}; | 30 | return {}; |
| 29 | } | 31 | } |
| 30 | 32 | ||
| 31 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { | 33 | VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { |
| 32 | switch (mipmap_filter) { | 34 | switch (mipmap_filter) { |
| 33 | case Tegra::Texture::TextureMipmapFilter::None: | 35 | case Tegra::Texture::TextureMipmapFilter::None: |
| 34 | // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping | 36 | // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping |
| 35 | // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to | 37 | // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to |
| 36 | // use an image view with a single mipmap level to emulate this. | 38 | // use an image view with a single mipmap level to emulate this. |
| 37 | return vk::SamplerMipmapMode::eLinear; | 39 | return VK_SAMPLER_MIPMAP_MODE_LINEAR; |
| 40 | ; | ||
| 38 | case Tegra::Texture::TextureMipmapFilter::Linear: | 41 | case Tegra::Texture::TextureMipmapFilter::Linear: |
| 39 | return vk::SamplerMipmapMode::eLinear; | 42 | return VK_SAMPLER_MIPMAP_MODE_LINEAR; |
| 40 | case Tegra::Texture::TextureMipmapFilter::Nearest: | 43 | case Tegra::Texture::TextureMipmapFilter::Nearest: |
| 41 | return vk::SamplerMipmapMode::eNearest; | 44 | return VK_SAMPLER_MIPMAP_MODE_NEAREST; |
| 42 | } | 45 | } |
| 43 | UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); | 46 | UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); |
| 44 | return {}; | 47 | return {}; |
| 45 | } | 48 | } |
| 46 | 49 | ||
| 47 | vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, | 50 | VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, |
| 48 | Tegra::Texture::TextureFilter filter) { | 51 | Tegra::Texture::TextureFilter filter) { |
| 49 | switch (wrap_mode) { | 52 | switch (wrap_mode) { |
| 50 | case Tegra::Texture::WrapMode::Wrap: | 53 | case Tegra::Texture::WrapMode::Wrap: |
| 51 | return vk::SamplerAddressMode::eRepeat; | 54 | return VK_SAMPLER_ADDRESS_MODE_REPEAT; |
| 52 | case Tegra::Texture::WrapMode::Mirror: | 55 | case Tegra::Texture::WrapMode::Mirror: |
| 53 | return vk::SamplerAddressMode::eMirroredRepeat; | 56 | return VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; |
| 54 | case Tegra::Texture::WrapMode::ClampToEdge: | 57 | case Tegra::Texture::WrapMode::ClampToEdge: |
| 55 | return vk::SamplerAddressMode::eClampToEdge; | 58 | return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; |
| 56 | case Tegra::Texture::WrapMode::Border: | 59 | case Tegra::Texture::WrapMode::Border: |
| 57 | return vk::SamplerAddressMode::eClampToBorder; | 60 | return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; |
| 58 | case Tegra::Texture::WrapMode::Clamp: | 61 | case Tegra::Texture::WrapMode::Clamp: |
| 59 | if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { | 62 | if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { |
| 60 | // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this | 63 | // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this |
| 61 | // by sending an invalid enumeration. | 64 | // by sending an invalid enumeration. |
| 62 | return static_cast<vk::SamplerAddressMode>(0xcafe); | 65 | return static_cast<VkSamplerAddressMode>(0xcafe); |
| 63 | } | 66 | } |
| 64 | // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors | 67 | // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors |
| 65 | switch (filter) { | 68 | switch (filter) { |
| 66 | case Tegra::Texture::TextureFilter::Nearest: | 69 | case Tegra::Texture::TextureFilter::Nearest: |
| 67 | return vk::SamplerAddressMode::eClampToEdge; | 70 | return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; |
| 68 | case Tegra::Texture::TextureFilter::Linear: | 71 | case Tegra::Texture::TextureFilter::Linear: |
| 69 | return vk::SamplerAddressMode::eClampToBorder; | 72 | return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; |
| 70 | } | 73 | } |
| 71 | UNREACHABLE(); | 74 | UNREACHABLE(); |
| 72 | return vk::SamplerAddressMode::eClampToEdge; | 75 | return VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; |
| 73 | case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: | 76 | case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: |
| 74 | return vk::SamplerAddressMode::eMirrorClampToEdge; | 77 | return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; |
| 75 | case Tegra::Texture::WrapMode::MirrorOnceBorder: | 78 | case Tegra::Texture::WrapMode::MirrorOnceBorder: |
| 76 | UNIMPLEMENTED(); | 79 | UNIMPLEMENTED(); |
| 77 | return vk::SamplerAddressMode::eMirrorClampToEdge; | 80 | return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; |
| 78 | default: | 81 | default: |
| 79 | UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); | 82 | UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); |
| 80 | return {}; | 83 | return {}; |
| 81 | } | 84 | } |
| 82 | } | 85 | } |
| 83 | 86 | ||
| 84 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { | 87 | VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { |
| 85 | switch (depth_compare_func) { | 88 | switch (depth_compare_func) { |
| 86 | case Tegra::Texture::DepthCompareFunc::Never: | 89 | case Tegra::Texture::DepthCompareFunc::Never: |
| 87 | return vk::CompareOp::eNever; | 90 | return VK_COMPARE_OP_NEVER; |
| 88 | case Tegra::Texture::DepthCompareFunc::Less: | 91 | case Tegra::Texture::DepthCompareFunc::Less: |
| 89 | return vk::CompareOp::eLess; | 92 | return VK_COMPARE_OP_LESS; |
| 90 | case Tegra::Texture::DepthCompareFunc::LessEqual: | 93 | case Tegra::Texture::DepthCompareFunc::LessEqual: |
| 91 | return vk::CompareOp::eLessOrEqual; | 94 | return VK_COMPARE_OP_LESS_OR_EQUAL; |
| 92 | case Tegra::Texture::DepthCompareFunc::Equal: | 95 | case Tegra::Texture::DepthCompareFunc::Equal: |
| 93 | return vk::CompareOp::eEqual; | 96 | return VK_COMPARE_OP_EQUAL; |
| 94 | case Tegra::Texture::DepthCompareFunc::NotEqual: | 97 | case Tegra::Texture::DepthCompareFunc::NotEqual: |
| 95 | return vk::CompareOp::eNotEqual; | 98 | return VK_COMPARE_OP_NOT_EQUAL; |
| 96 | case Tegra::Texture::DepthCompareFunc::Greater: | 99 | case Tegra::Texture::DepthCompareFunc::Greater: |
| 97 | return vk::CompareOp::eGreater; | 100 | return VK_COMPARE_OP_GREATER; |
| 98 | case Tegra::Texture::DepthCompareFunc::GreaterEqual: | 101 | case Tegra::Texture::DepthCompareFunc::GreaterEqual: |
| 99 | return vk::CompareOp::eGreaterOrEqual; | 102 | return VK_COMPARE_OP_GREATER_OR_EQUAL; |
| 100 | case Tegra::Texture::DepthCompareFunc::Always: | 103 | case Tegra::Texture::DepthCompareFunc::Always: |
| 101 | return vk::CompareOp::eAlways; | 104 | return VK_COMPARE_OP_ALWAYS; |
| 102 | } | 105 | } |
| 103 | UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", | 106 | UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", |
| 104 | static_cast<u32>(depth_compare_func)); | 107 | static_cast<u32>(depth_compare_func)); |
| @@ -112,92 +115,92 @@ namespace { | |||
| 112 | enum : u32 { Attachable = 1, Storage = 2 }; | 115 | enum : u32 { Attachable = 1, Storage = 2 }; |
| 113 | 116 | ||
| 114 | struct FormatTuple { | 117 | struct FormatTuple { |
| 115 | vk::Format format; ///< Vulkan format | 118 | VkFormat format; ///< Vulkan format |
| 116 | int usage; ///< Describes image format usage | 119 | int usage = 0; ///< Describes image format usage |
| 117 | } constexpr tex_format_tuples[] = { | 120 | } constexpr tex_format_tuples[] = { |
| 118 | {vk::Format::eA8B8G8R8UnormPack32, Attachable | Storage}, // ABGR8U | 121 | {VK_FORMAT_A8B8G8R8_UNORM_PACK32, Attachable | Storage}, // ABGR8U |
| 119 | {vk::Format::eA8B8G8R8SnormPack32, Attachable | Storage}, // ABGR8S | 122 | {VK_FORMAT_A8B8G8R8_SNORM_PACK32, Attachable | Storage}, // ABGR8S |
| 120 | {vk::Format::eA8B8G8R8UintPack32, Attachable | Storage}, // ABGR8UI | 123 | {VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // ABGR8UI |
| 121 | {vk::Format::eB5G6R5UnormPack16, {}}, // B5G6R5U | 124 | {VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5U |
| 122 | {vk::Format::eA2B10G10R10UnormPack32, Attachable | Storage}, // A2B10G10R10U | 125 | {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10U |
| 123 | {vk::Format::eA1R5G5B5UnormPack16, Attachable}, // A1B5G5R5U (flipped with swizzle) | 126 | {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5U (flipped with swizzle) |
| 124 | {vk::Format::eR8Unorm, Attachable | Storage}, // R8U | 127 | {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8U |
| 125 | {vk::Format::eR8Uint, Attachable | Storage}, // R8UI | 128 | {VK_FORMAT_R8_UINT, Attachable | Storage}, // R8UI |
| 126 | {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F | 129 | {VK_FORMAT_R16G16B16A16_SFLOAT, Attachable | Storage}, // RGBA16F |
| 127 | {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U | 130 | {VK_FORMAT_R16G16B16A16_UNORM, Attachable | Storage}, // RGBA16U |
| 128 | {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S | 131 | {VK_FORMAT_R16G16B16A16_SNORM, Attachable | Storage}, // RGBA16S |
| 129 | {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI | 132 | {VK_FORMAT_R16G16B16A16_UINT, Attachable | Storage}, // RGBA16UI |
| 130 | {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F | 133 | {VK_FORMAT_B10G11R11_UFLOAT_PACK32, Attachable | Storage}, // R11FG11FB10F |
| 131 | {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI | 134 | {VK_FORMAT_R32G32B32A32_UINT, Attachable | Storage}, // RGBA32UI |
| 132 | {vk::Format::eBc1RgbaUnormBlock, {}}, // DXT1 | 135 | {VK_FORMAT_BC1_RGBA_UNORM_BLOCK}, // DXT1 |
| 133 | {vk::Format::eBc2UnormBlock, {}}, // DXT23 | 136 | {VK_FORMAT_BC2_UNORM_BLOCK}, // DXT23 |
| 134 | {vk::Format::eBc3UnormBlock, {}}, // DXT45 | 137 | {VK_FORMAT_BC3_UNORM_BLOCK}, // DXT45 |
| 135 | {vk::Format::eBc4UnormBlock, {}}, // DXN1 | 138 | {VK_FORMAT_BC4_UNORM_BLOCK}, // DXN1 |
| 136 | {vk::Format::eBc5UnormBlock, {}}, // DXN2UNORM | 139 | {VK_FORMAT_BC5_UNORM_BLOCK}, // DXN2UNORM |
| 137 | {vk::Format::eBc5SnormBlock, {}}, // DXN2SNORM | 140 | {VK_FORMAT_BC5_SNORM_BLOCK}, // DXN2SNORM |
| 138 | {vk::Format::eBc7UnormBlock, {}}, // BC7U | 141 | {VK_FORMAT_BC7_UNORM_BLOCK}, // BC7U |
| 139 | {vk::Format::eBc6HUfloatBlock, {}}, // BC6H_UF16 | 142 | {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 |
| 140 | {vk::Format::eBc6HSfloatBlock, {}}, // BC6H_SF16 | 143 | {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 |
| 141 | {vk::Format::eAstc4x4UnormBlock, {}}, // ASTC_2D_4X4 | 144 | {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 |
| 142 | {vk::Format::eB8G8R8A8Unorm, {}}, // BGRA8 | 145 | {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 |
| 143 | {vk::Format::eR32G32B32A32Sfloat, Attachable | Storage}, // RGBA32F | 146 | {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F |
| 144 | {vk::Format::eR32G32Sfloat, Attachable | Storage}, // RG32F | 147 | {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F |
| 145 | {vk::Format::eR32Sfloat, Attachable | Storage}, // R32F | 148 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F |
| 146 | {vk::Format::eR16Sfloat, Attachable | Storage}, // R16F | 149 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F |
| 147 | {vk::Format::eR16Unorm, Attachable | Storage}, // R16U | 150 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U |
| 148 | {vk::Format::eUndefined, {}}, // R16S | 151 | {VK_FORMAT_UNDEFINED}, // R16S |
| 149 | {vk::Format::eUndefined, {}}, // R16UI | 152 | {VK_FORMAT_UNDEFINED}, // R16UI |
| 150 | {vk::Format::eUndefined, {}}, // R16I | 153 | {VK_FORMAT_UNDEFINED}, // R16I |
| 151 | {vk::Format::eR16G16Unorm, Attachable | Storage}, // RG16 | 154 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 |
| 152 | {vk::Format::eR16G16Sfloat, Attachable | Storage}, // RG16F | 155 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F |
| 153 | {vk::Format::eUndefined, {}}, // RG16UI | 156 | {VK_FORMAT_UNDEFINED}, // RG16UI |
| 154 | {vk::Format::eUndefined, {}}, // RG16I | 157 | {VK_FORMAT_UNDEFINED}, // RG16I |
| 155 | {vk::Format::eR16G16Snorm, Attachable | Storage}, // RG16S | 158 | {VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // RG16S |
| 156 | {vk::Format::eUndefined, {}}, // RGB32F | 159 | {VK_FORMAT_UNDEFINED}, // RGB32F |
| 157 | {vk::Format::eR8G8B8A8Srgb, Attachable}, // RGBA8_SRGB | 160 | {VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // RGBA8_SRGB |
| 158 | {vk::Format::eR8G8Unorm, Attachable | Storage}, // RG8U | 161 | {VK_FORMAT_R8G8_UNORM, Attachable | Storage}, // RG8U |
| 159 | {vk::Format::eR8G8Snorm, Attachable | Storage}, // RG8S | 162 | {VK_FORMAT_R8G8_SNORM, Attachable | Storage}, // RG8S |
| 160 | {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI | 163 | {VK_FORMAT_R32G32_UINT, Attachable | Storage}, // RG32UI |
| 161 | {vk::Format::eUndefined, {}}, // RGBX16F | 164 | {VK_FORMAT_UNDEFINED}, // RGBX16F |
| 162 | {vk::Format::eR32Uint, Attachable | Storage}, // R32UI | 165 | {VK_FORMAT_R32_UINT, Attachable | Storage}, // R32UI |
| 163 | {vk::Format::eR32Sint, Attachable | Storage}, // R32I | 166 | {VK_FORMAT_R32_SINT, Attachable | Storage}, // R32I |
| 164 | {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 | 167 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 |
| 165 | {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 | 168 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 |
| 166 | {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 | 169 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 |
| 167 | {vk::Format::eUndefined, {}}, // BGRA8_SRGB | 170 | {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB |
| 168 | {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB | 171 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB |
| 169 | {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB | 172 | {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB |
| 170 | {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB | 173 | {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB |
| 171 | {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB | 174 | {VK_FORMAT_BC7_SRGB_BLOCK}, // BC7U_SRGB |
| 172 | {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U | 175 | {VK_FORMAT_R4G4B4A4_UNORM_PACK16, Attachable}, // R4G4B4A4U |
| 173 | {vk::Format::eAstc4x4SrgbBlock, {}}, // ASTC_2D_4X4_SRGB | 176 | {VK_FORMAT_ASTC_4x4_SRGB_BLOCK}, // ASTC_2D_4X4_SRGB |
| 174 | {vk::Format::eAstc8x8SrgbBlock, {}}, // ASTC_2D_8X8_SRGB | 177 | {VK_FORMAT_ASTC_8x8_SRGB_BLOCK}, // ASTC_2D_8X8_SRGB |
| 175 | {vk::Format::eAstc8x5SrgbBlock, {}}, // ASTC_2D_8X5_SRGB | 178 | {VK_FORMAT_ASTC_8x5_SRGB_BLOCK}, // ASTC_2D_8X5_SRGB |
| 176 | {vk::Format::eAstc5x4SrgbBlock, {}}, // ASTC_2D_5X4_SRGB | 179 | {VK_FORMAT_ASTC_5x4_SRGB_BLOCK}, // ASTC_2D_5X4_SRGB |
| 177 | {vk::Format::eAstc5x5UnormBlock, {}}, // ASTC_2D_5X5 | 180 | {VK_FORMAT_ASTC_5x5_UNORM_BLOCK}, // ASTC_2D_5X5 |
| 178 | {vk::Format::eAstc5x5SrgbBlock, {}}, // ASTC_2D_5X5_SRGB | 181 | {VK_FORMAT_ASTC_5x5_SRGB_BLOCK}, // ASTC_2D_5X5_SRGB |
| 179 | {vk::Format::eAstc10x8UnormBlock, {}}, // ASTC_2D_10X8 | 182 | {VK_FORMAT_ASTC_10x8_UNORM_BLOCK}, // ASTC_2D_10X8 |
| 180 | {vk::Format::eAstc10x8SrgbBlock, {}}, // ASTC_2D_10X8_SRGB | 183 | {VK_FORMAT_ASTC_10x8_SRGB_BLOCK}, // ASTC_2D_10X8_SRGB |
| 181 | {vk::Format::eAstc6x6UnormBlock, {}}, // ASTC_2D_6X6 | 184 | {VK_FORMAT_ASTC_6x6_UNORM_BLOCK}, // ASTC_2D_6X6 |
| 182 | {vk::Format::eAstc6x6SrgbBlock, {}}, // ASTC_2D_6X6_SRGB | 185 | {VK_FORMAT_ASTC_6x6_SRGB_BLOCK}, // ASTC_2D_6X6_SRGB |
| 183 | {vk::Format::eAstc10x10UnormBlock, {}}, // ASTC_2D_10X10 | 186 | {VK_FORMAT_ASTC_10x10_UNORM_BLOCK}, // ASTC_2D_10X10 |
| 184 | {vk::Format::eAstc10x10SrgbBlock, {}}, // ASTC_2D_10X10_SRGB | 187 | {VK_FORMAT_ASTC_10x10_SRGB_BLOCK}, // ASTC_2D_10X10_SRGB |
| 185 | {vk::Format::eAstc12x12UnormBlock, {}}, // ASTC_2D_12X12 | 188 | {VK_FORMAT_ASTC_12x12_UNORM_BLOCK}, // ASTC_2D_12X12 |
| 186 | {vk::Format::eAstc12x12SrgbBlock, {}}, // ASTC_2D_12X12_SRGB | 189 | {VK_FORMAT_ASTC_12x12_SRGB_BLOCK}, // ASTC_2D_12X12_SRGB |
| 187 | {vk::Format::eAstc8x6UnormBlock, {}}, // ASTC_2D_8X6 | 190 | {VK_FORMAT_ASTC_8x6_UNORM_BLOCK}, // ASTC_2D_8X6 |
| 188 | {vk::Format::eAstc8x6SrgbBlock, {}}, // ASTC_2D_8X6_SRGB | 191 | {VK_FORMAT_ASTC_8x6_SRGB_BLOCK}, // ASTC_2D_8X6_SRGB |
| 189 | {vk::Format::eAstc6x5UnormBlock, {}}, // ASTC_2D_6X5 | 192 | {VK_FORMAT_ASTC_6x5_UNORM_BLOCK}, // ASTC_2D_6X5 |
| 190 | {vk::Format::eAstc6x5SrgbBlock, {}}, // ASTC_2D_6X5_SRGB | 193 | {VK_FORMAT_ASTC_6x5_SRGB_BLOCK}, // ASTC_2D_6X5_SRGB |
| 191 | {vk::Format::eE5B9G9R9UfloatPack32, {}}, // E5B9G9R9F | 194 | {VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}, // E5B9G9R9F |
| 192 | 195 | ||
| 193 | // Depth formats | 196 | // Depth formats |
| 194 | {vk::Format::eD32Sfloat, Attachable}, // Z32F | 197 | {VK_FORMAT_D32_SFLOAT, Attachable}, // Z32F |
| 195 | {vk::Format::eD16Unorm, Attachable}, // Z16 | 198 | {VK_FORMAT_D16_UNORM, Attachable}, // Z16 |
| 196 | 199 | ||
| 197 | // DepthStencil formats | 200 | // DepthStencil formats |
| 198 | {vk::Format::eD24UnormS8Uint, Attachable}, // Z24S8 | 201 | {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // Z24S8 |
| 199 | {vk::Format::eD24UnormS8Uint, Attachable}, // S8Z24 (emulated) | 202 | {VK_FORMAT_D24_UNORM_S8_UINT, Attachable}, // S8Z24 (emulated) |
| 200 | {vk::Format::eD32SfloatS8Uint, Attachable}, // Z32FS8 | 203 | {VK_FORMAT_D32_SFLOAT_S8_UINT, Attachable}, // Z32FS8 |
| 201 | }; | 204 | }; |
| 202 | static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); | 205 | static_assert(std::size(tex_format_tuples) == VideoCore::Surface::MaxPixelFormat); |
| 203 | 206 | ||
| @@ -212,106 +215,106 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo | |||
| 212 | ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); | 215 | ASSERT(static_cast<std::size_t>(pixel_format) < std::size(tex_format_tuples)); |
| 213 | 216 | ||
| 214 | auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; | 217 | auto tuple = tex_format_tuples[static_cast<std::size_t>(pixel_format)]; |
| 215 | if (tuple.format == vk::Format::eUndefined) { | 218 | if (tuple.format == VK_FORMAT_UNDEFINED) { |
| 216 | UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", | 219 | UNIMPLEMENTED_MSG("Unimplemented texture format with pixel format={}", |
| 217 | static_cast<u32>(pixel_format)); | 220 | static_cast<u32>(pixel_format)); |
| 218 | return {vk::Format::eA8B8G8R8UnormPack32, true, true}; | 221 | return {VK_FORMAT_A8B8G8R8_UNORM_PACK32, true, true}; |
| 219 | } | 222 | } |
| 220 | 223 | ||
| 221 | // Use ABGR8 on hardware that doesn't support ASTC natively | 224 | // Use ABGR8 on hardware that doesn't support ASTC natively |
| 222 | if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { | 225 | if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { |
| 223 | tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) | 226 | tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format) |
| 224 | ? vk::Format::eA8B8G8R8SrgbPack32 | 227 | ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 |
| 225 | : vk::Format::eA8B8G8R8UnormPack32; | 228 | : VK_FORMAT_A8B8G8R8_UNORM_PACK32; |
| 226 | } | 229 | } |
| 227 | const bool attachable = tuple.usage & Attachable; | 230 | const bool attachable = tuple.usage & Attachable; |
| 228 | const bool storage = tuple.usage & Storage; | 231 | const bool storage = tuple.usage & Storage; |
| 229 | 232 | ||
| 230 | vk::FormatFeatureFlags usage; | 233 | VkFormatFeatureFlags usage; |
| 231 | if (format_type == FormatType::Buffer) { | 234 | if (format_type == FormatType::Buffer) { |
| 232 | usage = vk::FormatFeatureFlagBits::eStorageTexelBuffer | | 235 | usage = |
| 233 | vk::FormatFeatureFlagBits::eUniformTexelBuffer; | 236 | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; |
| 234 | } else { | 237 | } else { |
| 235 | usage = vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eTransferDst | | 238 | usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT | |
| 236 | vk::FormatFeatureFlagBits::eTransferSrc; | 239 | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT; |
| 237 | if (attachable) { | 240 | if (attachable) { |
| 238 | usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment | 241 | usage |= IsZetaFormat(pixel_format) ? VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT |
| 239 | : vk::FormatFeatureFlagBits::eColorAttachment; | 242 | : VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; |
| 240 | } | 243 | } |
| 241 | if (storage) { | 244 | if (storage) { |
| 242 | usage |= vk::FormatFeatureFlagBits::eStorageImage; | 245 | usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; |
| 243 | } | 246 | } |
| 244 | } | 247 | } |
| 245 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; | 248 | return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage}; |
| 246 | } | 249 | } |
| 247 | 250 | ||
| 248 | vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { | 251 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { |
| 249 | switch (stage) { | 252 | switch (stage) { |
| 250 | case Tegra::Engines::ShaderType::Vertex: | 253 | case Tegra::Engines::ShaderType::Vertex: |
| 251 | return vk::ShaderStageFlagBits::eVertex; | 254 | return VK_SHADER_STAGE_VERTEX_BIT; |
| 252 | case Tegra::Engines::ShaderType::TesselationControl: | 255 | case Tegra::Engines::ShaderType::TesselationControl: |
| 253 | return vk::ShaderStageFlagBits::eTessellationControl; | 256 | return VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; |
| 254 | case Tegra::Engines::ShaderType::TesselationEval: | 257 | case Tegra::Engines::ShaderType::TesselationEval: |
| 255 | return vk::ShaderStageFlagBits::eTessellationEvaluation; | 258 | return VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; |
| 256 | case Tegra::Engines::ShaderType::Geometry: | 259 | case Tegra::Engines::ShaderType::Geometry: |
| 257 | return vk::ShaderStageFlagBits::eGeometry; | 260 | return VK_SHADER_STAGE_GEOMETRY_BIT; |
| 258 | case Tegra::Engines::ShaderType::Fragment: | 261 | case Tegra::Engines::ShaderType::Fragment: |
| 259 | return vk::ShaderStageFlagBits::eFragment; | 262 | return VK_SHADER_STAGE_FRAGMENT_BIT; |
| 260 | case Tegra::Engines::ShaderType::Compute: | 263 | case Tegra::Engines::ShaderType::Compute: |
| 261 | return vk::ShaderStageFlagBits::eCompute; | 264 | return VK_SHADER_STAGE_COMPUTE_BIT; |
| 262 | } | 265 | } |
| 263 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); | 266 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); |
| 264 | return {}; | 267 | return {}; |
| 265 | } | 268 | } |
| 266 | 269 | ||
| 267 | vk::PrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, | 270 | VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, |
| 268 | Maxwell::PrimitiveTopology topology) { | 271 | Maxwell::PrimitiveTopology topology) { |
| 269 | switch (topology) { | 272 | switch (topology) { |
| 270 | case Maxwell::PrimitiveTopology::Points: | 273 | case Maxwell::PrimitiveTopology::Points: |
| 271 | return vk::PrimitiveTopology::ePointList; | 274 | return VK_PRIMITIVE_TOPOLOGY_POINT_LIST; |
| 272 | case Maxwell::PrimitiveTopology::Lines: | 275 | case Maxwell::PrimitiveTopology::Lines: |
| 273 | return vk::PrimitiveTopology::eLineList; | 276 | return VK_PRIMITIVE_TOPOLOGY_LINE_LIST; |
| 274 | case Maxwell::PrimitiveTopology::LineStrip: | 277 | case Maxwell::PrimitiveTopology::LineStrip: |
| 275 | return vk::PrimitiveTopology::eLineStrip; | 278 | return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; |
| 276 | case Maxwell::PrimitiveTopology::Triangles: | 279 | case Maxwell::PrimitiveTopology::Triangles: |
| 277 | return vk::PrimitiveTopology::eTriangleList; | 280 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; |
| 278 | case Maxwell::PrimitiveTopology::TriangleStrip: | 281 | case Maxwell::PrimitiveTopology::TriangleStrip: |
| 279 | return vk::PrimitiveTopology::eTriangleStrip; | 282 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; |
| 280 | case Maxwell::PrimitiveTopology::TriangleFan: | 283 | case Maxwell::PrimitiveTopology::TriangleFan: |
| 281 | return vk::PrimitiveTopology::eTriangleFan; | 284 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; |
| 282 | case Maxwell::PrimitiveTopology::Quads: | 285 | case Maxwell::PrimitiveTopology::Quads: |
| 283 | // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases | 286 | // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases |
| 284 | return vk::PrimitiveTopology::eTriangleList; | 287 | return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; |
| 285 | case Maxwell::PrimitiveTopology::Patches: | 288 | case Maxwell::PrimitiveTopology::Patches: |
| 286 | return vk::PrimitiveTopology::ePatchList; | 289 | return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; |
| 287 | default: | 290 | default: |
| 288 | UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); | 291 | UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); |
| 289 | return {}; | 292 | return {}; |
| 290 | } | 293 | } |
| 291 | } | 294 | } |
| 292 | 295 | ||
| 293 | vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { | 296 | VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { |
| 294 | switch (type) { | 297 | switch (type) { |
| 295 | case Maxwell::VertexAttribute::Type::SignedNorm: | 298 | case Maxwell::VertexAttribute::Type::SignedNorm: |
| 296 | switch (size) { | 299 | switch (size) { |
| 297 | case Maxwell::VertexAttribute::Size::Size_8: | 300 | case Maxwell::VertexAttribute::Size::Size_8: |
| 298 | return vk::Format::eR8Snorm; | 301 | return VK_FORMAT_R8_SNORM; |
| 299 | case Maxwell::VertexAttribute::Size::Size_8_8: | 302 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 300 | return vk::Format::eR8G8Snorm; | 303 | return VK_FORMAT_R8G8_SNORM; |
| 301 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 304 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 302 | return vk::Format::eR8G8B8Snorm; | 305 | return VK_FORMAT_R8G8B8_SNORM; |
| 303 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 306 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 304 | return vk::Format::eR8G8B8A8Snorm; | 307 | return VK_FORMAT_R8G8B8A8_SNORM; |
| 305 | case Maxwell::VertexAttribute::Size::Size_16: | 308 | case Maxwell::VertexAttribute::Size::Size_16: |
| 306 | return vk::Format::eR16Snorm; | 309 | return VK_FORMAT_R16_SNORM; |
| 307 | case Maxwell::VertexAttribute::Size::Size_16_16: | 310 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 308 | return vk::Format::eR16G16Snorm; | 311 | return VK_FORMAT_R16G16_SNORM; |
| 309 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 312 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 310 | return vk::Format::eR16G16B16Snorm; | 313 | return VK_FORMAT_R16G16B16_SNORM; |
| 311 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 314 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 312 | return vk::Format::eR16G16B16A16Snorm; | 315 | return VK_FORMAT_R16G16B16A16_SNORM; |
| 313 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | 316 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: |
| 314 | return vk::Format::eA2B10G10R10SnormPack32; | 317 | return VK_FORMAT_A2B10G10R10_SNORM_PACK32; |
| 315 | default: | 318 | default: |
| 316 | break; | 319 | break; |
| 317 | } | 320 | } |
| @@ -319,23 +322,23 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 319 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | 322 | case Maxwell::VertexAttribute::Type::UnsignedNorm: |
| 320 | switch (size) { | 323 | switch (size) { |
| 321 | case Maxwell::VertexAttribute::Size::Size_8: | 324 | case Maxwell::VertexAttribute::Size::Size_8: |
| 322 | return vk::Format::eR8Unorm; | 325 | return VK_FORMAT_R8_UNORM; |
| 323 | case Maxwell::VertexAttribute::Size::Size_8_8: | 326 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 324 | return vk::Format::eR8G8Unorm; | 327 | return VK_FORMAT_R8G8_UNORM; |
| 325 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 328 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 326 | return vk::Format::eR8G8B8Unorm; | 329 | return VK_FORMAT_R8G8B8_UNORM; |
| 327 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 330 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 328 | return vk::Format::eR8G8B8A8Unorm; | 331 | return VK_FORMAT_R8G8B8A8_UNORM; |
| 329 | case Maxwell::VertexAttribute::Size::Size_16: | 332 | case Maxwell::VertexAttribute::Size::Size_16: |
| 330 | return vk::Format::eR16Unorm; | 333 | return VK_FORMAT_R16_UNORM; |
| 331 | case Maxwell::VertexAttribute::Size::Size_16_16: | 334 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 332 | return vk::Format::eR16G16Unorm; | 335 | return VK_FORMAT_R16G16_UNORM; |
| 333 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 336 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 334 | return vk::Format::eR16G16B16Unorm; | 337 | return VK_FORMAT_R16G16B16_UNORM; |
| 335 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 338 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 336 | return vk::Format::eR16G16B16A16Unorm; | 339 | return VK_FORMAT_R16G16B16A16_UNORM; |
| 337 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: | 340 | case Maxwell::VertexAttribute::Size::Size_10_10_10_2: |
| 338 | return vk::Format::eA2B10G10R10UnormPack32; | 341 | return VK_FORMAT_A2B10G10R10_UNORM_PACK32; |
| 339 | default: | 342 | default: |
| 340 | break; | 343 | break; |
| 341 | } | 344 | } |
| @@ -343,59 +346,69 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 343 | case Maxwell::VertexAttribute::Type::SignedInt: | 346 | case Maxwell::VertexAttribute::Type::SignedInt: |
| 344 | switch (size) { | 347 | switch (size) { |
| 345 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 348 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 346 | return vk::Format::eR16G16B16A16Sint; | 349 | return VK_FORMAT_R16G16B16A16_SINT; |
| 347 | case Maxwell::VertexAttribute::Size::Size_8: | 350 | case Maxwell::VertexAttribute::Size::Size_8: |
| 348 | return vk::Format::eR8Sint; | 351 | return VK_FORMAT_R8_SINT; |
| 349 | case Maxwell::VertexAttribute::Size::Size_8_8: | 352 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 350 | return vk::Format::eR8G8Sint; | 353 | return VK_FORMAT_R8G8_SINT; |
| 351 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 354 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 352 | return vk::Format::eR8G8B8Sint; | 355 | return VK_FORMAT_R8G8B8_SINT; |
| 353 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 356 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 354 | return vk::Format::eR8G8B8A8Sint; | 357 | return VK_FORMAT_R8G8B8A8_SINT; |
| 355 | case Maxwell::VertexAttribute::Size::Size_32: | 358 | case Maxwell::VertexAttribute::Size::Size_32: |
| 356 | return vk::Format::eR32Sint; | 359 | return VK_FORMAT_R32_SINT; |
| 357 | default: | 360 | default: |
| 358 | break; | 361 | break; |
| 359 | } | 362 | } |
| 363 | break; | ||
| 360 | case Maxwell::VertexAttribute::Type::UnsignedInt: | 364 | case Maxwell::VertexAttribute::Type::UnsignedInt: |
| 361 | switch (size) { | 365 | switch (size) { |
| 362 | case Maxwell::VertexAttribute::Size::Size_8: | 366 | case Maxwell::VertexAttribute::Size::Size_8: |
| 363 | return vk::Format::eR8Uint; | 367 | return VK_FORMAT_R8_UINT; |
| 364 | case Maxwell::VertexAttribute::Size::Size_8_8: | 368 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 365 | return vk::Format::eR8G8Uint; | 369 | return VK_FORMAT_R8G8_UINT; |
| 366 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 370 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 367 | return vk::Format::eR8G8B8Uint; | 371 | return VK_FORMAT_R8G8B8_UINT; |
| 368 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 372 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 369 | return vk::Format::eR8G8B8A8Uint; | 373 | return VK_FORMAT_R8G8B8A8_UINT; |
| 374 | case Maxwell::VertexAttribute::Size::Size_16: | ||
| 375 | return VK_FORMAT_R16_UINT; | ||
| 376 | case Maxwell::VertexAttribute::Size::Size_16_16: | ||
| 377 | return VK_FORMAT_R16G16_UINT; | ||
| 378 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | ||
| 379 | return VK_FORMAT_R16G16B16_UINT; | ||
| 380 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | ||
| 381 | return VK_FORMAT_R16G16B16A16_UINT; | ||
| 370 | case Maxwell::VertexAttribute::Size::Size_32: | 382 | case Maxwell::VertexAttribute::Size::Size_32: |
| 371 | return vk::Format::eR32Uint; | 383 | return VK_FORMAT_R32_UINT; |
| 372 | case Maxwell::VertexAttribute::Size::Size_32_32: | 384 | case Maxwell::VertexAttribute::Size::Size_32_32: |
| 373 | return vk::Format::eR32G32Uint; | 385 | return VK_FORMAT_R32G32_UINT; |
| 374 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | 386 | case Maxwell::VertexAttribute::Size::Size_32_32_32: |
| 375 | return vk::Format::eR32G32B32Uint; | 387 | return VK_FORMAT_R32G32B32_UINT; |
| 376 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | 388 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: |
| 377 | return vk::Format::eR32G32B32A32Uint; | 389 | return VK_FORMAT_R32G32B32A32_UINT; |
| 378 | default: | 390 | default: |
| 379 | break; | 391 | break; |
| 380 | } | 392 | } |
| 393 | break; | ||
| 381 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | 394 | case Maxwell::VertexAttribute::Type::UnsignedScaled: |
| 382 | switch (size) { | 395 | switch (size) { |
| 383 | case Maxwell::VertexAttribute::Size::Size_8: | 396 | case Maxwell::VertexAttribute::Size::Size_8: |
| 384 | return vk::Format::eR8Uscaled; | 397 | return VK_FORMAT_R8_USCALED; |
| 385 | case Maxwell::VertexAttribute::Size::Size_8_8: | 398 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 386 | return vk::Format::eR8G8Uscaled; | 399 | return VK_FORMAT_R8G8_USCALED; |
| 387 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 400 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 388 | return vk::Format::eR8G8B8Uscaled; | 401 | return VK_FORMAT_R8G8B8_USCALED; |
| 389 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 402 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 390 | return vk::Format::eR8G8B8A8Uscaled; | 403 | return VK_FORMAT_R8G8B8A8_USCALED; |
| 391 | case Maxwell::VertexAttribute::Size::Size_16: | 404 | case Maxwell::VertexAttribute::Size::Size_16: |
| 392 | return vk::Format::eR16Uscaled; | 405 | return VK_FORMAT_R16_USCALED; |
| 393 | case Maxwell::VertexAttribute::Size::Size_16_16: | 406 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 394 | return vk::Format::eR16G16Uscaled; | 407 | return VK_FORMAT_R16G16_USCALED; |
| 395 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 408 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 396 | return vk::Format::eR16G16B16Uscaled; | 409 | return VK_FORMAT_R16G16B16_USCALED; |
| 397 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 410 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 398 | return vk::Format::eR16G16B16A16Uscaled; | 411 | return VK_FORMAT_R16G16B16A16_USCALED; |
| 399 | default: | 412 | default: |
| 400 | break; | 413 | break; |
| 401 | } | 414 | } |
| @@ -403,21 +416,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 403 | case Maxwell::VertexAttribute::Type::SignedScaled: | 416 | case Maxwell::VertexAttribute::Type::SignedScaled: |
| 404 | switch (size) { | 417 | switch (size) { |
| 405 | case Maxwell::VertexAttribute::Size::Size_8: | 418 | case Maxwell::VertexAttribute::Size::Size_8: |
| 406 | return vk::Format::eR8Sscaled; | 419 | return VK_FORMAT_R8_SSCALED; |
| 407 | case Maxwell::VertexAttribute::Size::Size_8_8: | 420 | case Maxwell::VertexAttribute::Size::Size_8_8: |
| 408 | return vk::Format::eR8G8Sscaled; | 421 | return VK_FORMAT_R8G8_SSCALED; |
| 409 | case Maxwell::VertexAttribute::Size::Size_8_8_8: | 422 | case Maxwell::VertexAttribute::Size::Size_8_8_8: |
| 410 | return vk::Format::eR8G8B8Sscaled; | 423 | return VK_FORMAT_R8G8B8_SSCALED; |
| 411 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | 424 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: |
| 412 | return vk::Format::eR8G8B8A8Sscaled; | 425 | return VK_FORMAT_R8G8B8A8_SSCALED; |
| 413 | case Maxwell::VertexAttribute::Size::Size_16: | 426 | case Maxwell::VertexAttribute::Size::Size_16: |
| 414 | return vk::Format::eR16Sscaled; | 427 | return VK_FORMAT_R16_SSCALED; |
| 415 | case Maxwell::VertexAttribute::Size::Size_16_16: | 428 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 416 | return vk::Format::eR16G16Sscaled; | 429 | return VK_FORMAT_R16G16_SSCALED; |
| 417 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 430 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 418 | return vk::Format::eR16G16B16Sscaled; | 431 | return VK_FORMAT_R16G16B16_SSCALED; |
| 419 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 432 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 420 | return vk::Format::eR16G16B16A16Sscaled; | 433 | return VK_FORMAT_R16G16B16A16_SSCALED; |
| 421 | default: | 434 | default: |
| 422 | break; | 435 | break; |
| 423 | } | 436 | } |
| @@ -425,21 +438,21 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 425 | case Maxwell::VertexAttribute::Type::Float: | 438 | case Maxwell::VertexAttribute::Type::Float: |
| 426 | switch (size) { | 439 | switch (size) { |
| 427 | case Maxwell::VertexAttribute::Size::Size_32: | 440 | case Maxwell::VertexAttribute::Size::Size_32: |
| 428 | return vk::Format::eR32Sfloat; | 441 | return VK_FORMAT_R32_SFLOAT; |
| 429 | case Maxwell::VertexAttribute::Size::Size_32_32: | 442 | case Maxwell::VertexAttribute::Size::Size_32_32: |
| 430 | return vk::Format::eR32G32Sfloat; | 443 | return VK_FORMAT_R32G32_SFLOAT; |
| 431 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | 444 | case Maxwell::VertexAttribute::Size::Size_32_32_32: |
| 432 | return vk::Format::eR32G32B32Sfloat; | 445 | return VK_FORMAT_R32G32B32_SFLOAT; |
| 433 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | 446 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: |
| 434 | return vk::Format::eR32G32B32A32Sfloat; | 447 | return VK_FORMAT_R32G32B32A32_SFLOAT; |
| 435 | case Maxwell::VertexAttribute::Size::Size_16: | 448 | case Maxwell::VertexAttribute::Size::Size_16: |
| 436 | return vk::Format::eR16Sfloat; | 449 | return VK_FORMAT_R16_SFLOAT; |
| 437 | case Maxwell::VertexAttribute::Size::Size_16_16: | 450 | case Maxwell::VertexAttribute::Size::Size_16_16: |
| 438 | return vk::Format::eR16G16Sfloat; | 451 | return VK_FORMAT_R16G16_SFLOAT; |
| 439 | case Maxwell::VertexAttribute::Size::Size_16_16_16: | 452 | case Maxwell::VertexAttribute::Size::Size_16_16_16: |
| 440 | return vk::Format::eR16G16B16Sfloat; | 453 | return VK_FORMAT_R16G16B16_SFLOAT; |
| 441 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: | 454 | case Maxwell::VertexAttribute::Size::Size_16_16_16_16: |
| 442 | return vk::Format::eR16G16B16A16Sfloat; | 455 | return VK_FORMAT_R16G16B16A16_SFLOAT; |
| 443 | default: | 456 | default: |
| 444 | break; | 457 | break; |
| 445 | } | 458 | } |
| @@ -450,210 +463,210 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr | |||
| 450 | return {}; | 463 | return {}; |
| 451 | } | 464 | } |
| 452 | 465 | ||
| 453 | vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | 466 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { |
| 454 | switch (comparison) { | 467 | switch (comparison) { |
| 455 | case Maxwell::ComparisonOp::Never: | 468 | case Maxwell::ComparisonOp::Never: |
| 456 | case Maxwell::ComparisonOp::NeverOld: | 469 | case Maxwell::ComparisonOp::NeverOld: |
| 457 | return vk::CompareOp::eNever; | 470 | return VK_COMPARE_OP_NEVER; |
| 458 | case Maxwell::ComparisonOp::Less: | 471 | case Maxwell::ComparisonOp::Less: |
| 459 | case Maxwell::ComparisonOp::LessOld: | 472 | case Maxwell::ComparisonOp::LessOld: |
| 460 | return vk::CompareOp::eLess; | 473 | return VK_COMPARE_OP_LESS; |
| 461 | case Maxwell::ComparisonOp::Equal: | 474 | case Maxwell::ComparisonOp::Equal: |
| 462 | case Maxwell::ComparisonOp::EqualOld: | 475 | case Maxwell::ComparisonOp::EqualOld: |
| 463 | return vk::CompareOp::eEqual; | 476 | return VK_COMPARE_OP_EQUAL; |
| 464 | case Maxwell::ComparisonOp::LessEqual: | 477 | case Maxwell::ComparisonOp::LessEqual: |
| 465 | case Maxwell::ComparisonOp::LessEqualOld: | 478 | case Maxwell::ComparisonOp::LessEqualOld: |
| 466 | return vk::CompareOp::eLessOrEqual; | 479 | return VK_COMPARE_OP_LESS_OR_EQUAL; |
| 467 | case Maxwell::ComparisonOp::Greater: | 480 | case Maxwell::ComparisonOp::Greater: |
| 468 | case Maxwell::ComparisonOp::GreaterOld: | 481 | case Maxwell::ComparisonOp::GreaterOld: |
| 469 | return vk::CompareOp::eGreater; | 482 | return VK_COMPARE_OP_GREATER; |
| 470 | case Maxwell::ComparisonOp::NotEqual: | 483 | case Maxwell::ComparisonOp::NotEqual: |
| 471 | case Maxwell::ComparisonOp::NotEqualOld: | 484 | case Maxwell::ComparisonOp::NotEqualOld: |
| 472 | return vk::CompareOp::eNotEqual; | 485 | return VK_COMPARE_OP_NOT_EQUAL; |
| 473 | case Maxwell::ComparisonOp::GreaterEqual: | 486 | case Maxwell::ComparisonOp::GreaterEqual: |
| 474 | case Maxwell::ComparisonOp::GreaterEqualOld: | 487 | case Maxwell::ComparisonOp::GreaterEqualOld: |
| 475 | return vk::CompareOp::eGreaterOrEqual; | 488 | return VK_COMPARE_OP_GREATER_OR_EQUAL; |
| 476 | case Maxwell::ComparisonOp::Always: | 489 | case Maxwell::ComparisonOp::Always: |
| 477 | case Maxwell::ComparisonOp::AlwaysOld: | 490 | case Maxwell::ComparisonOp::AlwaysOld: |
| 478 | return vk::CompareOp::eAlways; | 491 | return VK_COMPARE_OP_ALWAYS; |
| 479 | } | 492 | } |
| 480 | UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); | 493 | UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); |
| 481 | return {}; | 494 | return {}; |
| 482 | } | 495 | } |
| 483 | 496 | ||
| 484 | vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { | 497 | VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format) { |
| 485 | switch (index_format) { | 498 | switch (index_format) { |
| 486 | case Maxwell::IndexFormat::UnsignedByte: | 499 | case Maxwell::IndexFormat::UnsignedByte: |
| 487 | if (!device.IsExtIndexTypeUint8Supported()) { | 500 | if (!device.IsExtIndexTypeUint8Supported()) { |
| 488 | UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); | 501 | UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); |
| 489 | return vk::IndexType::eUint16; | 502 | return VK_INDEX_TYPE_UINT16; |
| 490 | } | 503 | } |
| 491 | return vk::IndexType::eUint8EXT; | 504 | return VK_INDEX_TYPE_UINT8_EXT; |
| 492 | case Maxwell::IndexFormat::UnsignedShort: | 505 | case Maxwell::IndexFormat::UnsignedShort: |
| 493 | return vk::IndexType::eUint16; | 506 | return VK_INDEX_TYPE_UINT16; |
| 494 | case Maxwell::IndexFormat::UnsignedInt: | 507 | case Maxwell::IndexFormat::UnsignedInt: |
| 495 | return vk::IndexType::eUint32; | 508 | return VK_INDEX_TYPE_UINT32; |
| 496 | } | 509 | } |
| 497 | UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); | 510 | UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); |
| 498 | return {}; | 511 | return {}; |
| 499 | } | 512 | } |
| 500 | 513 | ||
| 501 | vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { | 514 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op) { |
| 502 | switch (stencil_op) { | 515 | switch (stencil_op) { |
| 503 | case Maxwell::StencilOp::Keep: | 516 | case Maxwell::StencilOp::Keep: |
| 504 | case Maxwell::StencilOp::KeepOGL: | 517 | case Maxwell::StencilOp::KeepOGL: |
| 505 | return vk::StencilOp::eKeep; | 518 | return VK_STENCIL_OP_KEEP; |
| 506 | case Maxwell::StencilOp::Zero: | 519 | case Maxwell::StencilOp::Zero: |
| 507 | case Maxwell::StencilOp::ZeroOGL: | 520 | case Maxwell::StencilOp::ZeroOGL: |
| 508 | return vk::StencilOp::eZero; | 521 | return VK_STENCIL_OP_ZERO; |
| 509 | case Maxwell::StencilOp::Replace: | 522 | case Maxwell::StencilOp::Replace: |
| 510 | case Maxwell::StencilOp::ReplaceOGL: | 523 | case Maxwell::StencilOp::ReplaceOGL: |
| 511 | return vk::StencilOp::eReplace; | 524 | return VK_STENCIL_OP_REPLACE; |
| 512 | case Maxwell::StencilOp::Incr: | 525 | case Maxwell::StencilOp::Incr: |
| 513 | case Maxwell::StencilOp::IncrOGL: | 526 | case Maxwell::StencilOp::IncrOGL: |
| 514 | return vk::StencilOp::eIncrementAndClamp; | 527 | return VK_STENCIL_OP_INCREMENT_AND_CLAMP; |
| 515 | case Maxwell::StencilOp::Decr: | 528 | case Maxwell::StencilOp::Decr: |
| 516 | case Maxwell::StencilOp::DecrOGL: | 529 | case Maxwell::StencilOp::DecrOGL: |
| 517 | return vk::StencilOp::eDecrementAndClamp; | 530 | return VK_STENCIL_OP_DECREMENT_AND_CLAMP; |
| 518 | case Maxwell::StencilOp::Invert: | 531 | case Maxwell::StencilOp::Invert: |
| 519 | case Maxwell::StencilOp::InvertOGL: | 532 | case Maxwell::StencilOp::InvertOGL: |
| 520 | return vk::StencilOp::eInvert; | 533 | return VK_STENCIL_OP_INVERT; |
| 521 | case Maxwell::StencilOp::IncrWrap: | 534 | case Maxwell::StencilOp::IncrWrap: |
| 522 | case Maxwell::StencilOp::IncrWrapOGL: | 535 | case Maxwell::StencilOp::IncrWrapOGL: |
| 523 | return vk::StencilOp::eIncrementAndWrap; | 536 | return VK_STENCIL_OP_INCREMENT_AND_WRAP; |
| 524 | case Maxwell::StencilOp::DecrWrap: | 537 | case Maxwell::StencilOp::DecrWrap: |
| 525 | case Maxwell::StencilOp::DecrWrapOGL: | 538 | case Maxwell::StencilOp::DecrWrapOGL: |
| 526 | return vk::StencilOp::eDecrementAndWrap; | 539 | return VK_STENCIL_OP_DECREMENT_AND_WRAP; |
| 527 | } | 540 | } |
| 528 | UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); | 541 | UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); |
| 529 | return {}; | 542 | return {}; |
| 530 | } | 543 | } |
| 531 | 544 | ||
| 532 | vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { | 545 | VkBlendOp BlendEquation(Maxwell::Blend::Equation equation) { |
| 533 | switch (equation) { | 546 | switch (equation) { |
| 534 | case Maxwell::Blend::Equation::Add: | 547 | case Maxwell::Blend::Equation::Add: |
| 535 | case Maxwell::Blend::Equation::AddGL: | 548 | case Maxwell::Blend::Equation::AddGL: |
| 536 | return vk::BlendOp::eAdd; | 549 | return VK_BLEND_OP_ADD; |
| 537 | case Maxwell::Blend::Equation::Subtract: | 550 | case Maxwell::Blend::Equation::Subtract: |
| 538 | case Maxwell::Blend::Equation::SubtractGL: | 551 | case Maxwell::Blend::Equation::SubtractGL: |
| 539 | return vk::BlendOp::eSubtract; | 552 | return VK_BLEND_OP_SUBTRACT; |
| 540 | case Maxwell::Blend::Equation::ReverseSubtract: | 553 | case Maxwell::Blend::Equation::ReverseSubtract: |
| 541 | case Maxwell::Blend::Equation::ReverseSubtractGL: | 554 | case Maxwell::Blend::Equation::ReverseSubtractGL: |
| 542 | return vk::BlendOp::eReverseSubtract; | 555 | return VK_BLEND_OP_REVERSE_SUBTRACT; |
| 543 | case Maxwell::Blend::Equation::Min: | 556 | case Maxwell::Blend::Equation::Min: |
| 544 | case Maxwell::Blend::Equation::MinGL: | 557 | case Maxwell::Blend::Equation::MinGL: |
| 545 | return vk::BlendOp::eMin; | 558 | return VK_BLEND_OP_MIN; |
| 546 | case Maxwell::Blend::Equation::Max: | 559 | case Maxwell::Blend::Equation::Max: |
| 547 | case Maxwell::Blend::Equation::MaxGL: | 560 | case Maxwell::Blend::Equation::MaxGL: |
| 548 | return vk::BlendOp::eMax; | 561 | return VK_BLEND_OP_MAX; |
| 549 | } | 562 | } |
| 550 | UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); | 563 | UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); |
| 551 | return {}; | 564 | return {}; |
| 552 | } | 565 | } |
| 553 | 566 | ||
| 554 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { | 567 | VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor) { |
| 555 | switch (factor) { | 568 | switch (factor) { |
| 556 | case Maxwell::Blend::Factor::Zero: | 569 | case Maxwell::Blend::Factor::Zero: |
| 557 | case Maxwell::Blend::Factor::ZeroGL: | 570 | case Maxwell::Blend::Factor::ZeroGL: |
| 558 | return vk::BlendFactor::eZero; | 571 | return VK_BLEND_FACTOR_ZERO; |
| 559 | case Maxwell::Blend::Factor::One: | 572 | case Maxwell::Blend::Factor::One: |
| 560 | case Maxwell::Blend::Factor::OneGL: | 573 | case Maxwell::Blend::Factor::OneGL: |
| 561 | return vk::BlendFactor::eOne; | 574 | return VK_BLEND_FACTOR_ONE; |
| 562 | case Maxwell::Blend::Factor::SourceColor: | 575 | case Maxwell::Blend::Factor::SourceColor: |
| 563 | case Maxwell::Blend::Factor::SourceColorGL: | 576 | case Maxwell::Blend::Factor::SourceColorGL: |
| 564 | return vk::BlendFactor::eSrcColor; | 577 | return VK_BLEND_FACTOR_SRC_COLOR; |
| 565 | case Maxwell::Blend::Factor::OneMinusSourceColor: | 578 | case Maxwell::Blend::Factor::OneMinusSourceColor: |
| 566 | case Maxwell::Blend::Factor::OneMinusSourceColorGL: | 579 | case Maxwell::Blend::Factor::OneMinusSourceColorGL: |
| 567 | return vk::BlendFactor::eOneMinusSrcColor; | 580 | return VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; |
| 568 | case Maxwell::Blend::Factor::SourceAlpha: | 581 | case Maxwell::Blend::Factor::SourceAlpha: |
| 569 | case Maxwell::Blend::Factor::SourceAlphaGL: | 582 | case Maxwell::Blend::Factor::SourceAlphaGL: |
| 570 | return vk::BlendFactor::eSrcAlpha; | 583 | return VK_BLEND_FACTOR_SRC_ALPHA; |
| 571 | case Maxwell::Blend::Factor::OneMinusSourceAlpha: | 584 | case Maxwell::Blend::Factor::OneMinusSourceAlpha: |
| 572 | case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: | 585 | case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: |
| 573 | return vk::BlendFactor::eOneMinusSrcAlpha; | 586 | return VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; |
| 574 | case Maxwell::Blend::Factor::DestAlpha: | 587 | case Maxwell::Blend::Factor::DestAlpha: |
| 575 | case Maxwell::Blend::Factor::DestAlphaGL: | 588 | case Maxwell::Blend::Factor::DestAlphaGL: |
| 576 | return vk::BlendFactor::eDstAlpha; | 589 | return VK_BLEND_FACTOR_DST_ALPHA; |
| 577 | case Maxwell::Blend::Factor::OneMinusDestAlpha: | 590 | case Maxwell::Blend::Factor::OneMinusDestAlpha: |
| 578 | case Maxwell::Blend::Factor::OneMinusDestAlphaGL: | 591 | case Maxwell::Blend::Factor::OneMinusDestAlphaGL: |
| 579 | return vk::BlendFactor::eOneMinusDstAlpha; | 592 | return VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; |
| 580 | case Maxwell::Blend::Factor::DestColor: | 593 | case Maxwell::Blend::Factor::DestColor: |
| 581 | case Maxwell::Blend::Factor::DestColorGL: | 594 | case Maxwell::Blend::Factor::DestColorGL: |
| 582 | return vk::BlendFactor::eDstColor; | 595 | return VK_BLEND_FACTOR_DST_COLOR; |
| 583 | case Maxwell::Blend::Factor::OneMinusDestColor: | 596 | case Maxwell::Blend::Factor::OneMinusDestColor: |
| 584 | case Maxwell::Blend::Factor::OneMinusDestColorGL: | 597 | case Maxwell::Blend::Factor::OneMinusDestColorGL: |
| 585 | return vk::BlendFactor::eOneMinusDstColor; | 598 | return VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; |
| 586 | case Maxwell::Blend::Factor::SourceAlphaSaturate: | 599 | case Maxwell::Blend::Factor::SourceAlphaSaturate: |
| 587 | case Maxwell::Blend::Factor::SourceAlphaSaturateGL: | 600 | case Maxwell::Blend::Factor::SourceAlphaSaturateGL: |
| 588 | return vk::BlendFactor::eSrcAlphaSaturate; | 601 | return VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; |
| 589 | case Maxwell::Blend::Factor::Source1Color: | 602 | case Maxwell::Blend::Factor::Source1Color: |
| 590 | case Maxwell::Blend::Factor::Source1ColorGL: | 603 | case Maxwell::Blend::Factor::Source1ColorGL: |
| 591 | return vk::BlendFactor::eSrc1Color; | 604 | return VK_BLEND_FACTOR_SRC1_COLOR; |
| 592 | case Maxwell::Blend::Factor::OneMinusSource1Color: | 605 | case Maxwell::Blend::Factor::OneMinusSource1Color: |
| 593 | case Maxwell::Blend::Factor::OneMinusSource1ColorGL: | 606 | case Maxwell::Blend::Factor::OneMinusSource1ColorGL: |
| 594 | return vk::BlendFactor::eOneMinusSrc1Color; | 607 | return VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR; |
| 595 | case Maxwell::Blend::Factor::Source1Alpha: | 608 | case Maxwell::Blend::Factor::Source1Alpha: |
| 596 | case Maxwell::Blend::Factor::Source1AlphaGL: | 609 | case Maxwell::Blend::Factor::Source1AlphaGL: |
| 597 | return vk::BlendFactor::eSrc1Alpha; | 610 | return VK_BLEND_FACTOR_SRC1_ALPHA; |
| 598 | case Maxwell::Blend::Factor::OneMinusSource1Alpha: | 611 | case Maxwell::Blend::Factor::OneMinusSource1Alpha: |
| 599 | case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: | 612 | case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: |
| 600 | return vk::BlendFactor::eOneMinusSrc1Alpha; | 613 | return VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA; |
| 601 | case Maxwell::Blend::Factor::ConstantColor: | 614 | case Maxwell::Blend::Factor::ConstantColor: |
| 602 | case Maxwell::Blend::Factor::ConstantColorGL: | 615 | case Maxwell::Blend::Factor::ConstantColorGL: |
| 603 | return vk::BlendFactor::eConstantColor; | 616 | return VK_BLEND_FACTOR_CONSTANT_COLOR; |
| 604 | case Maxwell::Blend::Factor::OneMinusConstantColor: | 617 | case Maxwell::Blend::Factor::OneMinusConstantColor: |
| 605 | case Maxwell::Blend::Factor::OneMinusConstantColorGL: | 618 | case Maxwell::Blend::Factor::OneMinusConstantColorGL: |
| 606 | return vk::BlendFactor::eOneMinusConstantColor; | 619 | return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR; |
| 607 | case Maxwell::Blend::Factor::ConstantAlpha: | 620 | case Maxwell::Blend::Factor::ConstantAlpha: |
| 608 | case Maxwell::Blend::Factor::ConstantAlphaGL: | 621 | case Maxwell::Blend::Factor::ConstantAlphaGL: |
| 609 | return vk::BlendFactor::eConstantAlpha; | 622 | return VK_BLEND_FACTOR_CONSTANT_ALPHA; |
| 610 | case Maxwell::Blend::Factor::OneMinusConstantAlpha: | 623 | case Maxwell::Blend::Factor::OneMinusConstantAlpha: |
| 611 | case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: | 624 | case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: |
| 612 | return vk::BlendFactor::eOneMinusConstantAlpha; | 625 | return VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA; |
| 613 | } | 626 | } |
| 614 | UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); | 627 | UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); |
| 615 | return {}; | 628 | return {}; |
| 616 | } | 629 | } |
| 617 | 630 | ||
| 618 | vk::FrontFace FrontFace(Maxwell::FrontFace front_face) { | 631 | VkFrontFace FrontFace(Maxwell::FrontFace front_face) { |
| 619 | switch (front_face) { | 632 | switch (front_face) { |
| 620 | case Maxwell::FrontFace::ClockWise: | 633 | case Maxwell::FrontFace::ClockWise: |
| 621 | return vk::FrontFace::eClockwise; | 634 | return VK_FRONT_FACE_CLOCKWISE; |
| 622 | case Maxwell::FrontFace::CounterClockWise: | 635 | case Maxwell::FrontFace::CounterClockWise: |
| 623 | return vk::FrontFace::eCounterClockwise; | 636 | return VK_FRONT_FACE_COUNTER_CLOCKWISE; |
| 624 | } | 637 | } |
| 625 | UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); | 638 | UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); |
| 626 | return {}; | 639 | return {}; |
| 627 | } | 640 | } |
| 628 | 641 | ||
| 629 | vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) { | 642 | VkCullModeFlags CullFace(Maxwell::CullFace cull_face) { |
| 630 | switch (cull_face) { | 643 | switch (cull_face) { |
| 631 | case Maxwell::CullFace::Front: | 644 | case Maxwell::CullFace::Front: |
| 632 | return vk::CullModeFlagBits::eFront; | 645 | return VK_CULL_MODE_FRONT_BIT; |
| 633 | case Maxwell::CullFace::Back: | 646 | case Maxwell::CullFace::Back: |
| 634 | return vk::CullModeFlagBits::eBack; | 647 | return VK_CULL_MODE_BACK_BIT; |
| 635 | case Maxwell::CullFace::FrontAndBack: | 648 | case Maxwell::CullFace::FrontAndBack: |
| 636 | return vk::CullModeFlagBits::eFrontAndBack; | 649 | return VK_CULL_MODE_FRONT_AND_BACK; |
| 637 | } | 650 | } |
| 638 | UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); | 651 | UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); |
| 639 | return {}; | 652 | return {}; |
| 640 | } | 653 | } |
| 641 | 654 | ||
| 642 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { | 655 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { |
| 643 | switch (swizzle) { | 656 | switch (swizzle) { |
| 644 | case Tegra::Texture::SwizzleSource::Zero: | 657 | case Tegra::Texture::SwizzleSource::Zero: |
| 645 | return vk::ComponentSwizzle::eZero; | 658 | return VK_COMPONENT_SWIZZLE_ZERO; |
| 646 | case Tegra::Texture::SwizzleSource::R: | 659 | case Tegra::Texture::SwizzleSource::R: |
| 647 | return vk::ComponentSwizzle::eR; | 660 | return VK_COMPONENT_SWIZZLE_R; |
| 648 | case Tegra::Texture::SwizzleSource::G: | 661 | case Tegra::Texture::SwizzleSource::G: |
| 649 | return vk::ComponentSwizzle::eG; | 662 | return VK_COMPONENT_SWIZZLE_G; |
| 650 | case Tegra::Texture::SwizzleSource::B: | 663 | case Tegra::Texture::SwizzleSource::B: |
| 651 | return vk::ComponentSwizzle::eB; | 664 | return VK_COMPONENT_SWIZZLE_B; |
| 652 | case Tegra::Texture::SwizzleSource::A: | 665 | case Tegra::Texture::SwizzleSource::A: |
| 653 | return vk::ComponentSwizzle::eA; | 666 | return VK_COMPONENT_SWIZZLE_A; |
| 654 | case Tegra::Texture::SwizzleSource::OneInt: | 667 | case Tegra::Texture::SwizzleSource::OneInt: |
| 655 | case Tegra::Texture::SwizzleSource::OneFloat: | 668 | case Tegra::Texture::SwizzleSource::OneFloat: |
| 656 | return vk::ComponentSwizzle::eOne; | 669 | return VK_COMPONENT_SWIZZLE_ONE; |
| 657 | } | 670 | } |
| 658 | UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); | 671 | UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); |
| 659 | return {}; | 672 | return {}; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 24f6ab544..81bce4c6c 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -6,8 +6,8 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_device.h" | 9 | #include "video_core/renderer_vulkan/vk_device.h" |
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 11 | #include "video_core/surface.h" | 11 | #include "video_core/surface.h" |
| 12 | #include "video_core/textures/texture.h" | 12 | #include "video_core/textures/texture.h" |
| 13 | 13 | ||
| @@ -18,46 +18,45 @@ using PixelFormat = VideoCore::Surface::PixelFormat; | |||
| 18 | 18 | ||
| 19 | namespace Sampler { | 19 | namespace Sampler { |
| 20 | 20 | ||
| 21 | vk::Filter Filter(Tegra::Texture::TextureFilter filter); | 21 | VkFilter Filter(Tegra::Texture::TextureFilter filter); |
| 22 | 22 | ||
| 23 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); | 23 | VkSamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); |
| 24 | 24 | ||
| 25 | vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, | 25 | VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode, |
| 26 | Tegra::Texture::TextureFilter filter); | 26 | Tegra::Texture::TextureFilter filter); |
| 27 | 27 | ||
| 28 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); | 28 | VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); |
| 29 | 29 | ||
| 30 | } // namespace Sampler | 30 | } // namespace Sampler |
| 31 | 31 | ||
| 32 | struct FormatInfo { | 32 | struct FormatInfo { |
| 33 | vk::Format format; | 33 | VkFormat format; |
| 34 | bool attachable; | 34 | bool attachable; |
| 35 | bool storage; | 35 | bool storage; |
| 36 | }; | 36 | }; |
| 37 | 37 | ||
| 38 | FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); | 38 | FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFormat pixel_format); |
| 39 | 39 | ||
| 40 | vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); | 40 | VkShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage); |
| 41 | 41 | ||
| 42 | vk::PrimitiveTopology PrimitiveTopology(const VKDevice& device, | 42 | VkPrimitiveTopology PrimitiveTopology(const VKDevice& device, Maxwell::PrimitiveTopology topology); |
| 43 | Maxwell::PrimitiveTopology topology); | ||
| 44 | 43 | ||
| 45 | vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); | 44 | VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); |
| 46 | 45 | ||
| 47 | vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | 46 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); |
| 48 | 47 | ||
| 49 | vk::IndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); | 48 | VkIndexType IndexFormat(const VKDevice& device, Maxwell::IndexFormat index_format); |
| 50 | 49 | ||
| 51 | vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); | 50 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); |
| 52 | 51 | ||
| 53 | vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); | 52 | VkBlendOp BlendEquation(Maxwell::Blend::Equation equation); |
| 54 | 53 | ||
| 55 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); | 54 | VkBlendFactor BlendFactor(Maxwell::Blend::Factor factor); |
| 56 | 55 | ||
| 57 | vk::FrontFace FrontFace(Maxwell::FrontFace front_face); | 56 | VkFrontFace FrontFace(Maxwell::FrontFace front_face); |
| 58 | 57 | ||
| 59 | vk::CullModeFlags CullFace(Maxwell::CullFace cull_face); | 58 | VkCullModeFlags CullFace(Maxwell::CullFace cull_face); |
| 60 | 59 | ||
| 61 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | 60 | VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); |
| 62 | 61 | ||
| 63 | } // namespace Vulkan::MaxwellToVK | 62 | } // namespace Vulkan::MaxwellToVK |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 6953aaafe..dd590c38b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -2,13 +2,18 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstring> | ||
| 5 | #include <memory> | 8 | #include <memory> |
| 6 | #include <optional> | 9 | #include <optional> |
| 10 | #include <string> | ||
| 7 | #include <vector> | 11 | #include <vector> |
| 8 | 12 | ||
| 9 | #include <fmt/format.h> | 13 | #include <fmt/format.h> |
| 10 | 14 | ||
| 11 | #include "common/assert.h" | 15 | #include "common/assert.h" |
| 16 | #include "common/dynamic_library.h" | ||
| 12 | #include "common/logging/log.h" | 17 | #include "common/logging/log.h" |
| 13 | #include "common/telemetry.h" | 18 | #include "common/telemetry.h" |
| 14 | #include "core/core.h" | 19 | #include "core/core.h" |
| @@ -19,7 +24,6 @@ | |||
| 19 | #include "core/settings.h" | 24 | #include "core/settings.h" |
| 20 | #include "core/telemetry_session.h" | 25 | #include "core/telemetry_session.h" |
| 21 | #include "video_core/gpu.h" | 26 | #include "video_core/gpu.h" |
| 22 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 23 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 27 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 24 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 28 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 25 | #include "video_core/renderer_vulkan/vk_device.h" | 29 | #include "video_core/renderer_vulkan/vk_device.h" |
| @@ -29,30 +33,145 @@ | |||
| 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 33 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 30 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 34 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 31 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 35 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 36 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 37 | |||
| 38 | // Include these late to avoid polluting previous headers | ||
| 39 | #ifdef _WIN32 | ||
| 40 | #include <windows.h> | ||
| 41 | // ensure include order | ||
| 42 | #include <vulkan/vulkan_win32.h> | ||
| 43 | #endif | ||
| 44 | |||
| 45 | #ifdef __linux__ | ||
| 46 | #include <X11/Xlib.h> | ||
| 47 | #include <vulkan/vulkan_wayland.h> | ||
| 48 | #include <vulkan/vulkan_xlib.h> | ||
| 49 | #endif | ||
| 32 | 50 | ||
| 33 | namespace Vulkan { | 51 | namespace Vulkan { |
| 34 | 52 | ||
| 35 | namespace { | 53 | namespace { |
| 36 | 54 | ||
| 37 | VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, | 55 | using Core::Frontend::WindowSystemType; |
| 56 | |||
| 57 | VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, | ||
| 38 | VkDebugUtilsMessageTypeFlagsEXT type, | 58 | VkDebugUtilsMessageTypeFlagsEXT type, |
| 39 | const VkDebugUtilsMessengerCallbackDataEXT* data, | 59 | const VkDebugUtilsMessengerCallbackDataEXT* data, |
| 40 | [[maybe_unused]] void* user_data) { | 60 | [[maybe_unused]] void* user_data) { |
| 41 | const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_}; | ||
| 42 | const char* message{data->pMessage}; | 61 | const char* message{data->pMessage}; |
| 43 | 62 | ||
| 44 | if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { | 63 | if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { |
| 45 | LOG_CRITICAL(Render_Vulkan, "{}", message); | 64 | LOG_CRITICAL(Render_Vulkan, "{}", message); |
| 46 | } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning) { | 65 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) { |
| 47 | LOG_WARNING(Render_Vulkan, "{}", message); | 66 | LOG_WARNING(Render_Vulkan, "{}", message); |
| 48 | } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo) { | 67 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT) { |
| 49 | LOG_INFO(Render_Vulkan, "{}", message); | 68 | LOG_INFO(Render_Vulkan, "{}", message); |
| 50 | } else if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose) { | 69 | } else if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT) { |
| 51 | LOG_DEBUG(Render_Vulkan, "{}", message); | 70 | LOG_DEBUG(Render_Vulkan, "{}", message); |
| 52 | } | 71 | } |
| 53 | return VK_FALSE; | 72 | return VK_FALSE; |
| 54 | } | 73 | } |
| 55 | 74 | ||
| 75 | Common::DynamicLibrary OpenVulkanLibrary() { | ||
| 76 | Common::DynamicLibrary library; | ||
| 77 | #ifdef __APPLE__ | ||
| 78 | // Check if a path to a specific Vulkan library has been specified. | ||
| 79 | char* libvulkan_env = getenv("LIBVULKAN_PATH"); | ||
| 80 | if (!libvulkan_env || !library.Open(libvulkan_env)) { | ||
| 81 | // Use the libvulkan.dylib from the application bundle. | ||
| 82 | std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; | ||
| 83 | library.Open(filename.c_str()); | ||
| 84 | } | ||
| 85 | #else | ||
| 86 | std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); | ||
| 87 | if (!library.Open(filename.c_str())) { | ||
| 88 | // Android devices may not have libvulkan.so.1, only libvulkan.so. | ||
| 89 | filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); | ||
| 90 | library.Open(filename.c_str()); | ||
| 91 | } | ||
| 92 | #endif | ||
| 93 | return library; | ||
| 94 | } | ||
| 95 | |||
| 96 | vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatch& dld, | ||
| 97 | WindowSystemType window_type = WindowSystemType::Headless, | ||
| 98 | bool enable_layers = false) { | ||
| 99 | if (!library.IsOpen()) { | ||
| 100 | LOG_ERROR(Render_Vulkan, "Vulkan library not available"); | ||
| 101 | return {}; | ||
| 102 | } | ||
| 103 | if (!library.GetSymbol("vkGetInstanceProcAddr", &dld.vkGetInstanceProcAddr)) { | ||
| 104 | LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); | ||
| 105 | return {}; | ||
| 106 | } | ||
| 107 | if (!vk::Load(dld)) { | ||
| 108 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); | ||
| 109 | return {}; | ||
| 110 | } | ||
| 111 | |||
| 112 | std::vector<const char*> extensions; | ||
| 113 | extensions.reserve(6); | ||
| 114 | switch (window_type) { | ||
| 115 | case Core::Frontend::WindowSystemType::Headless: | ||
| 116 | break; | ||
| 117 | #ifdef _WIN32 | ||
| 118 | case Core::Frontend::WindowSystemType::Windows: | ||
| 119 | extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); | ||
| 120 | break; | ||
| 121 | #endif | ||
| 122 | #ifdef __linux__ | ||
| 123 | case Core::Frontend::WindowSystemType::X11: | ||
| 124 | extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); | ||
| 125 | break; | ||
| 126 | case Core::Frontend::WindowSystemType::Wayland: | ||
| 127 | extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); | ||
| 128 | break; | ||
| 129 | #endif | ||
| 130 | default: | ||
| 131 | LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | if (window_type != Core::Frontend::WindowSystemType::Headless) { | ||
| 135 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); | ||
| 136 | } | ||
| 137 | if (enable_layers) { | ||
| 138 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | ||
| 139 | } | ||
| 140 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); | ||
| 141 | |||
| 142 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 143 | if (!properties) { | ||
| 144 | LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); | ||
| 145 | return {}; | ||
| 146 | } | ||
| 147 | |||
| 148 | for (const char* extension : extensions) { | ||
| 149 | const auto it = | ||
| 150 | std::find_if(properties->begin(), properties->end(), [extension](const auto& prop) { | ||
| 151 | return !std::strcmp(extension, prop.extensionName); | ||
| 152 | }); | ||
| 153 | if (it == properties->end()) { | ||
| 154 | LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); | ||
| 155 | return {}; | ||
| 156 | } | ||
| 157 | } | ||
| 158 | |||
| 159 | static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; | ||
| 160 | vk::Span<const char*> layers = layers_data; | ||
| 161 | if (!enable_layers) { | ||
| 162 | layers = {}; | ||
| 163 | } | ||
| 164 | vk::Instance instance = vk::Instance::Create(layers, extensions, dld); | ||
| 165 | if (!instance) { | ||
| 166 | LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); | ||
| 167 | return {}; | ||
| 168 | } | ||
| 169 | if (!vk::Load(*instance, dld)) { | ||
| 170 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); | ||
| 171 | } | ||
| 172 | return instance; | ||
| 173 | } | ||
| 174 | |||
| 56 | std::string GetReadableVersion(u32 version) { | 175 | std::string GetReadableVersion(u32 version) { |
| 57 | return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), | 176 | return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), |
| 58 | VK_VERSION_PATCH(version)); | 177 | VK_VERSION_PATCH(version)); |
| @@ -63,14 +182,14 @@ std::string GetDriverVersion(const VKDevice& device) { | |||
| 63 | // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 | 182 | // https://github.com/SaschaWillems/vulkan.gpuinfo.org/blob/5dddea46ea1120b0df14eef8f15ff8e318e35462/functions.php#L308-L314 |
| 64 | const u32 version = device.GetDriverVersion(); | 183 | const u32 version = device.GetDriverVersion(); |
| 65 | 184 | ||
| 66 | if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { | 185 | if (device.GetDriverID() == VK_DRIVER_ID_NVIDIA_PROPRIETARY_KHR) { |
| 67 | const u32 major = (version >> 22) & 0x3ff; | 186 | const u32 major = (version >> 22) & 0x3ff; |
| 68 | const u32 minor = (version >> 14) & 0x0ff; | 187 | const u32 minor = (version >> 14) & 0x0ff; |
| 69 | const u32 secondary = (version >> 6) & 0x0ff; | 188 | const u32 secondary = (version >> 6) & 0x0ff; |
| 70 | const u32 tertiary = version & 0x003f; | 189 | const u32 tertiary = version & 0x003f; |
| 71 | return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); | 190 | return fmt::format("{}.{}.{}.{}", major, minor, secondary, tertiary); |
| 72 | } | 191 | } |
| 73 | if (device.GetDriverID() == vk::DriverIdKHR::eIntelProprietaryWindows) { | 192 | if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { |
| 74 | const u32 major = version >> 14; | 193 | const u32 major = version >> 14; |
| 75 | const u32 minor = version & 0x3fff; | 194 | const u32 minor = version & 0x3fff; |
| 76 | return fmt::format("{}.{}", major, minor); | 195 | return fmt::format("{}.{}", major, minor); |
| @@ -147,27 +266,12 @@ bool RendererVulkan::TryPresent(int /*timeout_ms*/) { | |||
| 147 | } | 266 | } |
| 148 | 267 | ||
| 149 | bool RendererVulkan::Init() { | 268 | bool RendererVulkan::Init() { |
| 150 | PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; | 269 | library = OpenVulkanLibrary(); |
| 151 | render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); | 270 | instance = CreateInstance(library, dld, render_window.GetWindowInfo().type, |
| 152 | const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); | 271 | Settings::values.renderer_debug); |
| 153 | 272 | if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { | |
| 154 | std::optional<vk::DebugUtilsMessengerEXT> callback; | ||
| 155 | if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) { | ||
| 156 | callback = CreateDebugCallback(dldi); | ||
| 157 | if (!callback) { | ||
| 158 | return false; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | |||
| 162 | if (!PickDevices(dldi)) { | ||
| 163 | if (callback) { | ||
| 164 | instance.destroy(*callback, nullptr, dldi); | ||
| 165 | } | ||
| 166 | return false; | 273 | return false; |
| 167 | } | 274 | } |
| 168 | debug_callback = UniqueDebugUtilsMessengerEXT( | ||
| 169 | *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>( | ||
| 170 | instance, nullptr, device->GetDispatchLoader())); | ||
| 171 | 275 | ||
| 172 | Report(); | 276 | Report(); |
| 173 | 277 | ||
| @@ -176,7 +280,7 @@ bool RendererVulkan::Init() { | |||
| 176 | resource_manager = std::make_unique<VKResourceManager>(*device); | 280 | resource_manager = std::make_unique<VKResourceManager>(*device); |
| 177 | 281 | ||
| 178 | const auto& framebuffer = render_window.GetFramebufferLayout(); | 282 | const auto& framebuffer = render_window.GetFramebufferLayout(); |
| 179 | swapchain = std::make_unique<VKSwapchain>(surface, *device); | 283 | swapchain = std::make_unique<VKSwapchain>(*surface, *device); |
| 180 | swapchain->Create(framebuffer.width, framebuffer.height, false); | 284 | swapchain->Create(framebuffer.width, framebuffer.height, false); |
| 181 | 285 | ||
| 182 | state_tracker = std::make_unique<StateTracker>(system); | 286 | state_tracker = std::make_unique<StateTracker>(system); |
| @@ -198,10 +302,8 @@ void RendererVulkan::ShutDown() { | |||
| 198 | if (!device) { | 302 | if (!device) { |
| 199 | return; | 303 | return; |
| 200 | } | 304 | } |
| 201 | const auto dev = device->GetLogical(); | 305 | if (const auto& dev = device->GetLogical()) { |
| 202 | const auto& dld = device->GetDispatchLoader(); | 306 | dev.WaitIdle(); |
| 203 | if (dev && dld.vkDeviceWaitIdle) { | ||
| 204 | dev.waitIdle(dld); | ||
| 205 | } | 307 | } |
| 206 | 308 | ||
| 207 | rasterizer.reset(); | 309 | rasterizer.reset(); |
| @@ -213,44 +315,94 @@ void RendererVulkan::ShutDown() { | |||
| 213 | device.reset(); | 315 | device.reset(); |
| 214 | } | 316 | } |
| 215 | 317 | ||
| 216 | std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( | 318 | bool RendererVulkan::CreateDebugCallback() { |
| 217 | const vk::DispatchLoaderDynamic& dldi) { | 319 | if (!Settings::values.renderer_debug) { |
| 218 | const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( | 320 | return true; |
| 219 | {}, | 321 | } |
| 220 | vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | | 322 | debug_callback = instance.TryCreateDebugCallback(DebugCallback); |
| 221 | vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning | | 323 | if (!debug_callback) { |
| 222 | vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | | ||
| 223 | vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose, | ||
| 224 | vk::DebugUtilsMessageTypeFlagBitsEXT::eGeneral | | ||
| 225 | vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | | ||
| 226 | vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, | ||
| 227 | &DebugCallback, nullptr); | ||
| 228 | vk::DebugUtilsMessengerEXT callback; | ||
| 229 | if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) != | ||
| 230 | vk::Result::eSuccess) { | ||
| 231 | LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); | 324 | LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); |
| 232 | return {}; | 325 | return false; |
| 233 | } | 326 | } |
| 234 | return callback; | 327 | return true; |
| 235 | } | 328 | } |
| 236 | 329 | ||
| 237 | bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { | 330 | bool RendererVulkan::CreateSurface() { |
| 238 | const auto devices = instance.enumeratePhysicalDevices(dldi); | 331 | [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); |
| 332 | VkSurfaceKHR unsafe_surface = nullptr; | ||
| 333 | |||
| 334 | #ifdef _WIN32 | ||
| 335 | if (window_info.type == Core::Frontend::WindowSystemType::Windows) { | ||
| 336 | const HWND hWnd = static_cast<HWND>(window_info.render_surface); | ||
| 337 | const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, | ||
| 338 | nullptr, 0, nullptr, hWnd}; | ||
| 339 | const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( | ||
| 340 | dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); | ||
| 341 | if (!vkCreateWin32SurfaceKHR || | ||
| 342 | vkCreateWin32SurfaceKHR(*instance, &win32_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { | ||
| 343 | LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); | ||
| 344 | return false; | ||
| 345 | } | ||
| 346 | } | ||
| 347 | #endif | ||
| 348 | #ifdef __linux__ | ||
| 349 | if (window_info.type == Core::Frontend::WindowSystemType::X11) { | ||
| 350 | const VkXlibSurfaceCreateInfoKHR xlib_ci{ | ||
| 351 | VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, | ||
| 352 | static_cast<Display*>(window_info.display_connection), | ||
| 353 | reinterpret_cast<Window>(window_info.render_surface)}; | ||
| 354 | const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( | ||
| 355 | dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); | ||
| 356 | if (!vkCreateXlibSurfaceKHR || | ||
| 357 | vkCreateXlibSurfaceKHR(*instance, &xlib_ci, nullptr, &unsafe_surface) != VK_SUCCESS) { | ||
| 358 | LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); | ||
| 359 | return false; | ||
| 360 | } | ||
| 361 | } | ||
| 362 | if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { | ||
| 363 | const VkWaylandSurfaceCreateInfoKHR wayland_ci{ | ||
| 364 | VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, | ||
| 365 | static_cast<wl_display*>(window_info.display_connection), | ||
| 366 | static_cast<wl_surface*>(window_info.render_surface)}; | ||
| 367 | const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( | ||
| 368 | dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); | ||
| 369 | if (!vkCreateWaylandSurfaceKHR || | ||
| 370 | vkCreateWaylandSurfaceKHR(*instance, &wayland_ci, nullptr, &unsafe_surface) != | ||
| 371 | VK_SUCCESS) { | ||
| 372 | LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); | ||
| 373 | return false; | ||
| 374 | } | ||
| 375 | } | ||
| 376 | #endif | ||
| 377 | if (!unsafe_surface) { | ||
| 378 | LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); | ||
| 379 | return false; | ||
| 380 | } | ||
| 381 | |||
| 382 | surface = vk::SurfaceKHR(unsafe_surface, *instance, dld); | ||
| 383 | return true; | ||
| 384 | } | ||
| 385 | |||
| 386 | bool RendererVulkan::PickDevices() { | ||
| 387 | const auto devices = instance.EnumeratePhysicalDevices(); | ||
| 388 | if (!devices) { | ||
| 389 | LOG_ERROR(Render_Vulkan, "Failed to enumerate physical devices"); | ||
| 390 | return false; | ||
| 391 | } | ||
| 239 | 392 | ||
| 240 | // TODO(Rodrigo): Choose device from config file | ||
| 241 | const s32 device_index = Settings::values.vulkan_device; | 393 | const s32 device_index = Settings::values.vulkan_device; |
| 242 | if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { | 394 | if (device_index < 0 || device_index >= static_cast<s32>(devices->size())) { |
| 243 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | 395 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); |
| 244 | return false; | 396 | return false; |
| 245 | } | 397 | } |
| 246 | const vk::PhysicalDevice physical_device = devices[device_index]; | 398 | const vk::PhysicalDevice physical_device((*devices)[static_cast<std::size_t>(device_index)], |
| 247 | 399 | dld); | |
| 248 | if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { | 400 | if (!VKDevice::IsSuitable(physical_device, *surface)) { |
| 249 | return false; | 401 | return false; |
| 250 | } | 402 | } |
| 251 | 403 | ||
| 252 | device = std::make_unique<VKDevice>(dldi, physical_device, surface); | 404 | device = std::make_unique<VKDevice>(*instance, physical_device, *surface, dld); |
| 253 | return device->Create(dldi, instance); | 405 | return device->Create(); |
| 254 | } | 406 | } |
| 255 | 407 | ||
| 256 | void RendererVulkan::Report() const { | 408 | void RendererVulkan::Report() const { |
| @@ -276,4 +428,25 @@ void RendererVulkan::Report() const { | |||
| 276 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); | 428 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); |
| 277 | } | 429 | } |
| 278 | 430 | ||
| 431 | std::vector<std::string> RendererVulkan::EnumerateDevices() { | ||
| 432 | vk::InstanceDispatch dld; | ||
| 433 | Common::DynamicLibrary library = OpenVulkanLibrary(); | ||
| 434 | vk::Instance instance = CreateInstance(library, dld); | ||
| 435 | if (!instance) { | ||
| 436 | return {}; | ||
| 437 | } | ||
| 438 | |||
| 439 | const std::optional physical_devices = instance.EnumeratePhysicalDevices(); | ||
| 440 | if (!physical_devices) { | ||
| 441 | return {}; | ||
| 442 | } | ||
| 443 | |||
| 444 | std::vector<std::string> names; | ||
| 445 | names.reserve(physical_devices->size()); | ||
| 446 | for (const auto& device : *physical_devices) { | ||
| 447 | names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); | ||
| 448 | } | ||
| 449 | return names; | ||
| 450 | } | ||
| 451 | |||
| 279 | } // namespace Vulkan | 452 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index d14384e79..18270909b 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -6,10 +6,13 @@ | |||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <string> | ||
| 9 | #include <vector> | 10 | #include <vector> |
| 10 | 11 | ||
| 12 | #include "common/dynamic_library.h" | ||
| 13 | |||
| 11 | #include "video_core/renderer_base.h" | 14 | #include "video_core/renderer_base.h" |
| 12 | #include "video_core/renderer_vulkan/declarations.h" | 15 | #include "video_core/renderer_vulkan/wrapper.h" |
| 13 | 16 | ||
| 14 | namespace Core { | 17 | namespace Core { |
| 15 | class System; | 18 | class System; |
| @@ -44,22 +47,28 @@ public: | |||
| 44 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 47 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 45 | bool TryPresent(int timeout_ms) override; | 48 | bool TryPresent(int timeout_ms) override; |
| 46 | 49 | ||
| 50 | static std::vector<std::string> EnumerateDevices(); | ||
| 51 | |||
| 47 | private: | 52 | private: |
| 48 | std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( | 53 | bool CreateDebugCallback(); |
| 49 | const vk::DispatchLoaderDynamic& dldi); | ||
| 50 | 54 | ||
| 51 | bool PickDevices(const vk::DispatchLoaderDynamic& dldi); | 55 | bool CreateSurface(); |
| 56 | |||
| 57 | bool PickDevices(); | ||
| 52 | 58 | ||
| 53 | void Report() const; | 59 | void Report() const; |
| 54 | 60 | ||
| 55 | Core::System& system; | 61 | Core::System& system; |
| 56 | 62 | ||
| 63 | Common::DynamicLibrary library; | ||
| 64 | vk::InstanceDispatch dld; | ||
| 65 | |||
| 57 | vk::Instance instance; | 66 | vk::Instance instance; |
| 58 | vk::SurfaceKHR surface; | 67 | vk::SurfaceKHR surface; |
| 59 | 68 | ||
| 60 | VKScreenInfo screen_info; | 69 | VKScreenInfo screen_info; |
| 61 | 70 | ||
| 62 | UniqueDebugUtilsMessengerEXT debug_callback; | 71 | vk::DebugCallback debug_callback; |
| 63 | std::unique_ptr<VKDevice> device; | 72 | std::unique_ptr<VKDevice> device; |
| 64 | std::unique_ptr<VKSwapchain> swapchain; | 73 | std::unique_ptr<VKSwapchain> swapchain; |
| 65 | std::unique_ptr<VKMemoryManager> memory_manager; | 74 | std::unique_ptr<VKMemoryManager> memory_manager; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 855cfc883..fbd406f2b 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include "video_core/gpu.h" | 20 | #include "video_core/gpu.h" |
| 21 | #include "video_core/morton.h" | 21 | #include "video_core/morton.h" |
| 22 | #include "video_core/rasterizer_interface.h" | 22 | #include "video_core/rasterizer_interface.h" |
| 23 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 24 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 23 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 25 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 24 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 26 | #include "video_core/renderer_vulkan/vk_device.h" | 25 | #include "video_core/renderer_vulkan/vk_device.h" |
| @@ -30,6 +29,7 @@ | |||
| 30 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 29 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 31 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 30 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 32 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 31 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 32 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 33 | #include "video_core/surface.h" | 33 | #include "video_core/surface.h" |
| 34 | 34 | ||
| 35 | namespace Vulkan { | 35 | namespace Vulkan { |
| @@ -140,16 +140,25 @@ struct ScreenRectVertex { | |||
| 140 | std::array<f32, 2> position; | 140 | std::array<f32, 2> position; |
| 141 | std::array<f32, 2> tex_coord; | 141 | std::array<f32, 2> tex_coord; |
| 142 | 142 | ||
| 143 | static vk::VertexInputBindingDescription GetDescription() { | 143 | static VkVertexInputBindingDescription GetDescription() { |
| 144 | return vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertex), | 144 | VkVertexInputBindingDescription description; |
| 145 | vk::VertexInputRate::eVertex); | 145 | description.binding = 0; |
| 146 | description.stride = sizeof(ScreenRectVertex); | ||
| 147 | description.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; | ||
| 148 | return description; | ||
| 146 | } | 149 | } |
| 147 | 150 | ||
| 148 | static std::array<vk::VertexInputAttributeDescription, 2> GetAttributes() { | 151 | static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() { |
| 149 | return {vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, | 152 | std::array<VkVertexInputAttributeDescription, 2> attributes; |
| 150 | offsetof(ScreenRectVertex, position)), | 153 | attributes[0].location = 0; |
| 151 | vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32Sfloat, | 154 | attributes[0].binding = 0; |
| 152 | offsetof(ScreenRectVertex, tex_coord))}; | 155 | attributes[0].format = VK_FORMAT_R32G32_SFLOAT; |
| 156 | attributes[0].offset = offsetof(ScreenRectVertex, position); | ||
| 157 | attributes[1].location = 1; | ||
| 158 | attributes[1].binding = 0; | ||
| 159 | attributes[1].format = VK_FORMAT_R32G32_SFLOAT; | ||
| 160 | attributes[1].offset = offsetof(ScreenRectVertex, tex_coord); | ||
| 161 | return attributes; | ||
| 153 | } | 162 | } |
| 154 | }; | 163 | }; |
| 155 | 164 | ||
| @@ -172,16 +181,16 @@ std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { | |||
| 172 | static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer); | 181 | static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer); |
| 173 | } | 182 | } |
| 174 | 183 | ||
| 175 | vk::Format GetFormat(const Tegra::FramebufferConfig& framebuffer) { | 184 | VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { |
| 176 | switch (framebuffer.pixel_format) { | 185 | switch (framebuffer.pixel_format) { |
| 177 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: | 186 | case Tegra::FramebufferConfig::PixelFormat::ABGR8: |
| 178 | return vk::Format::eA8B8G8R8UnormPack32; | 187 | return VK_FORMAT_A8B8G8R8_UNORM_PACK32; |
| 179 | case Tegra::FramebufferConfig::PixelFormat::RGB565: | 188 | case Tegra::FramebufferConfig::PixelFormat::RGB565: |
| 180 | return vk::Format::eR5G6B5UnormPack16; | 189 | return VK_FORMAT_R5G6B5_UNORM_PACK16; |
| 181 | default: | 190 | default: |
| 182 | UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", | 191 | UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", |
| 183 | static_cast<u32>(framebuffer.pixel_format)); | 192 | static_cast<u32>(framebuffer.pixel_format)); |
| 184 | return vk::Format::eA8B8G8R8UnormPack32; | 193 | return VK_FORMAT_A8B8G8R8_UNORM_PACK32; |
| 185 | } | 194 | } |
| 186 | } | 195 | } |
| 187 | 196 | ||
| @@ -219,8 +228,8 @@ void VKBlitScreen::Recreate() { | |||
| 219 | CreateDynamicResources(); | 228 | CreateDynamicResources(); |
| 220 | } | 229 | } |
| 221 | 230 | ||
| 222 | std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | 231 | std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, |
| 223 | bool use_accelerated) { | 232 | bool use_accelerated) { |
| 224 | RefreshResources(framebuffer); | 233 | RefreshResources(framebuffer); |
| 225 | 234 | ||
| 226 | // Finish any pending renderpass | 235 | // Finish any pending renderpass |
| @@ -255,46 +264,76 @@ std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferC | |||
| 255 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, | 264 | framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1, |
| 256 | map.GetAddress() + image_offset, host_ptr); | 265 | map.GetAddress() + image_offset, host_ptr); |
| 257 | 266 | ||
| 258 | blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eTransfer, | 267 | blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 259 | vk::AccessFlagBits::eTransferWrite, | 268 | VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); |
| 260 | vk::ImageLayout::eTransferDstOptimal); | 269 | |
| 261 | 270 | VkBufferImageCopy copy; | |
| 262 | const vk::BufferImageCopy copy(image_offset, 0, 0, | 271 | copy.bufferOffset = image_offset; |
| 263 | {vk::ImageAspectFlagBits::eColor, 0, 0, 1}, {0, 0, 0}, | 272 | copy.bufferRowLength = 0; |
| 264 | {framebuffer.width, framebuffer.height, 1}); | 273 | copy.bufferImageHeight = 0; |
| 265 | scheduler.Record([buffer_handle = *buffer, image = blit_image->GetHandle(), | 274 | copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; |
| 266 | copy](auto cmdbuf, auto& dld) { | 275 | copy.imageSubresource.mipLevel = 0; |
| 267 | cmdbuf.copyBufferToImage(buffer_handle, image, vk::ImageLayout::eTransferDstOptimal, | 276 | copy.imageSubresource.baseArrayLayer = 0; |
| 268 | {copy}, dld); | 277 | copy.imageSubresource.layerCount = 1; |
| 269 | }); | 278 | copy.imageOffset.x = 0; |
| 279 | copy.imageOffset.y = 0; | ||
| 280 | copy.imageOffset.z = 0; | ||
| 281 | copy.imageExtent.width = framebuffer.width; | ||
| 282 | copy.imageExtent.height = framebuffer.height; | ||
| 283 | copy.imageExtent.depth = 1; | ||
| 284 | scheduler.Record( | ||
| 285 | [buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) { | ||
| 286 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | ||
| 287 | }); | ||
| 270 | } | 288 | } |
| 271 | map.Release(); | 289 | map.Release(); |
| 272 | 290 | ||
| 273 | blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eFragmentShader, | 291 | blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, |
| 274 | vk::AccessFlagBits::eShaderRead, | 292 | VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); |
| 275 | vk::ImageLayout::eShaderReadOnlyOptimal); | ||
| 276 | 293 | ||
| 277 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], | 294 | scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index], |
| 278 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, | 295 | descriptor_set = descriptor_sets[image_index], buffer = *buffer, |
| 279 | size = swapchain.GetSize(), pipeline = *pipeline, | 296 | size = swapchain.GetSize(), pipeline = *pipeline, |
| 280 | layout = *pipeline_layout](auto cmdbuf, auto& dld) { | 297 | layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { |
| 281 | const vk::ClearValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}}; | 298 | VkClearValue clear_color; |
| 282 | const vk::RenderPassBeginInfo renderpass_bi(renderpass, framebuffer, {{0, 0}, size}, 1, | 299 | clear_color.color.float32[0] = 0.0f; |
| 283 | &clear_color); | 300 | clear_color.color.float32[1] = 0.0f; |
| 284 | 301 | clear_color.color.float32[2] = 0.0f; | |
| 285 | cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); | 302 | clear_color.color.float32[3] = 0.0f; |
| 286 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); | 303 | |
| 287 | cmdbuf.setViewport( | 304 | VkRenderPassBeginInfo renderpass_bi; |
| 288 | 0, | 305 | renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; |
| 289 | {{0.0f, 0.0f, static_cast<f32>(size.width), static_cast<f32>(size.height), 0.0f, 1.0f}}, | 306 | renderpass_bi.pNext = nullptr; |
| 290 | dld); | 307 | renderpass_bi.renderPass = renderpass; |
| 291 | cmdbuf.setScissor(0, {{{0, 0}, size}}, dld); | 308 | renderpass_bi.framebuffer = framebuffer; |
| 292 | 309 | renderpass_bi.renderArea.offset.x = 0; | |
| 293 | cmdbuf.bindVertexBuffers(0, {buffer}, {offsetof(BufferData, vertices)}, dld); | 310 | renderpass_bi.renderArea.offset.y = 0; |
| 294 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, {descriptor_set}, {}, | 311 | renderpass_bi.renderArea.extent = size; |
| 295 | dld); | 312 | renderpass_bi.clearValueCount = 1; |
| 296 | cmdbuf.draw(4, 1, 0, 0, dld); | 313 | renderpass_bi.pClearValues = &clear_color; |
| 297 | cmdbuf.endRenderPass(dld); | 314 | |
| 315 | VkViewport viewport; | ||
| 316 | viewport.x = 0.0f; | ||
| 317 | viewport.y = 0.0f; | ||
| 318 | viewport.width = static_cast<float>(size.width); | ||
| 319 | viewport.height = static_cast<float>(size.height); | ||
| 320 | viewport.minDepth = 0.0f; | ||
| 321 | viewport.maxDepth = 1.0f; | ||
| 322 | |||
| 323 | VkRect2D scissor; | ||
| 324 | scissor.offset.x = 0; | ||
| 325 | scissor.offset.y = 0; | ||
| 326 | scissor.extent = size; | ||
| 327 | |||
| 328 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); | ||
| 329 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); | ||
| 330 | cmdbuf.SetViewport(0, viewport); | ||
| 331 | cmdbuf.SetScissor(0, scissor); | ||
| 332 | |||
| 333 | cmdbuf.BindVertexBuffer(0, buffer, offsetof(BufferData, vertices)); | ||
| 334 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); | ||
| 335 | cmdbuf.Draw(4, 1, 0, 0); | ||
| 336 | cmdbuf.EndRenderPass(); | ||
| 298 | }); | 337 | }); |
| 299 | 338 | ||
| 300 | return {scheduler.GetFence(), *semaphores[image_index]}; | 339 | return {scheduler.GetFence(), *semaphores[image_index]}; |
| @@ -334,165 +373,297 @@ void VKBlitScreen::CreateShaders() { | |||
| 334 | } | 373 | } |
| 335 | 374 | ||
| 336 | void VKBlitScreen::CreateSemaphores() { | 375 | void VKBlitScreen::CreateSemaphores() { |
| 337 | const auto dev = device.GetLogical(); | ||
| 338 | const auto& dld = device.GetDispatchLoader(); | ||
| 339 | |||
| 340 | semaphores.resize(image_count); | 376 | semaphores.resize(image_count); |
| 341 | for (std::size_t i = 0; i < image_count; ++i) { | 377 | std::generate(semaphores.begin(), semaphores.end(), |
| 342 | semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); | 378 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 343 | } | ||
| 344 | } | 379 | } |
| 345 | 380 | ||
| 346 | void VKBlitScreen::CreateDescriptorPool() { | 381 | void VKBlitScreen::CreateDescriptorPool() { |
| 347 | const std::array<vk::DescriptorPoolSize, 2> pool_sizes{ | 382 | std::array<VkDescriptorPoolSize, 2> pool_sizes; |
| 348 | vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, static_cast<u32>(image_count)}, | 383 | pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; |
| 349 | vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler, | 384 | pool_sizes[0].descriptorCount = static_cast<u32>(image_count); |
| 350 | static_cast<u32>(image_count)}}; | 385 | pool_sizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; |
| 351 | const vk::DescriptorPoolCreateInfo pool_ci( | 386 | pool_sizes[1].descriptorCount = static_cast<u32>(image_count); |
| 352 | {}, static_cast<u32>(image_count), static_cast<u32>(pool_sizes.size()), pool_sizes.data()); | 387 | |
| 353 | const auto dev = device.GetLogical(); | 388 | VkDescriptorPoolCreateInfo ci; |
| 354 | descriptor_pool = dev.createDescriptorPoolUnique(pool_ci, nullptr, device.GetDispatchLoader()); | 389 | ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; |
| 390 | ci.pNext = nullptr; | ||
| 391 | ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; | ||
| 392 | ci.maxSets = static_cast<u32>(image_count); | ||
| 393 | ci.poolSizeCount = static_cast<u32>(pool_sizes.size()); | ||
| 394 | ci.pPoolSizes = pool_sizes.data(); | ||
| 395 | descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); | ||
| 355 | } | 396 | } |
| 356 | 397 | ||
| 357 | void VKBlitScreen::CreateRenderPass() { | 398 | void VKBlitScreen::CreateRenderPass() { |
| 358 | const vk::AttachmentDescription color_attachment( | 399 | VkAttachmentDescription color_attachment; |
| 359 | {}, swapchain.GetImageFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear, | 400 | color_attachment.flags = 0; |
| 360 | vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, | 401 | color_attachment.format = swapchain.GetImageFormat(); |
| 361 | vk::AttachmentStoreOp::eDontCare, vk::ImageLayout::eUndefined, | 402 | color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; |
| 362 | vk::ImageLayout::ePresentSrcKHR); | 403 | color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; |
| 363 | 404 | color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; | |
| 364 | const vk::AttachmentReference color_attachment_ref(0, vk::ImageLayout::eColorAttachmentOptimal); | 405 | color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; |
| 365 | 406 | color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; | |
| 366 | const vk::SubpassDescription subpass_description({}, vk::PipelineBindPoint::eGraphics, 0, | 407 | color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; |
| 367 | nullptr, 1, &color_attachment_ref, nullptr, | 408 | color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; |
| 368 | nullptr, 0, nullptr); | 409 | |
| 369 | 410 | VkAttachmentReference color_attachment_ref; | |
| 370 | const vk::SubpassDependency dependency( | 411 | color_attachment_ref.attachment = 0; |
| 371 | VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eColorAttachmentOutput, | 412 | color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; |
| 372 | vk::PipelineStageFlagBits::eColorAttachmentOutput, {}, | 413 | |
| 373 | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite, {}); | 414 | VkSubpassDescription subpass_description; |
| 374 | 415 | subpass_description.flags = 0; | |
| 375 | const vk::RenderPassCreateInfo renderpass_ci({}, 1, &color_attachment, 1, &subpass_description, | 416 | subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; |
| 376 | 1, &dependency); | 417 | subpass_description.inputAttachmentCount = 0; |
| 377 | 418 | subpass_description.pInputAttachments = nullptr; | |
| 378 | const auto dev = device.GetLogical(); | 419 | subpass_description.colorAttachmentCount = 1; |
| 379 | renderpass = dev.createRenderPassUnique(renderpass_ci, nullptr, device.GetDispatchLoader()); | 420 | subpass_description.pColorAttachments = &color_attachment_ref; |
| 421 | subpass_description.pResolveAttachments = nullptr; | ||
| 422 | subpass_description.pDepthStencilAttachment = nullptr; | ||
| 423 | subpass_description.preserveAttachmentCount = 0; | ||
| 424 | subpass_description.pPreserveAttachments = nullptr; | ||
| 425 | |||
| 426 | VkSubpassDependency dependency; | ||
| 427 | dependency.srcSubpass = VK_SUBPASS_EXTERNAL; | ||
| 428 | dependency.dstSubpass = 0; | ||
| 429 | dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | ||
| 430 | dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; | ||
| 431 | dependency.srcAccessMask = 0; | ||
| 432 | dependency.dstAccessMask = | ||
| 433 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; | ||
| 434 | dependency.dependencyFlags = 0; | ||
| 435 | |||
| 436 | VkRenderPassCreateInfo renderpass_ci; | ||
| 437 | renderpass_ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; | ||
| 438 | renderpass_ci.pNext = nullptr; | ||
| 439 | renderpass_ci.flags = 0; | ||
| 440 | renderpass_ci.attachmentCount = 1; | ||
| 441 | renderpass_ci.pAttachments = &color_attachment; | ||
| 442 | renderpass_ci.subpassCount = 1; | ||
| 443 | renderpass_ci.pSubpasses = &subpass_description; | ||
| 444 | renderpass_ci.dependencyCount = 1; | ||
| 445 | renderpass_ci.pDependencies = &dependency; | ||
| 446 | |||
| 447 | renderpass = device.GetLogical().CreateRenderPass(renderpass_ci); | ||
| 380 | } | 448 | } |
| 381 | 449 | ||
| 382 | void VKBlitScreen::CreateDescriptorSetLayout() { | 450 | void VKBlitScreen::CreateDescriptorSetLayout() { |
| 383 | const std::array<vk::DescriptorSetLayoutBinding, 2> layout_bindings{ | 451 | std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings; |
| 384 | vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1, | 452 | layout_bindings[0].binding = 0; |
| 385 | vk::ShaderStageFlagBits::eVertex, nullptr), | 453 | layout_bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; |
| 386 | vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eCombinedImageSampler, 1, | 454 | layout_bindings[0].descriptorCount = 1; |
| 387 | vk::ShaderStageFlagBits::eFragment, nullptr)}; | 455 | layout_bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT; |
| 388 | const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( | 456 | layout_bindings[0].pImmutableSamplers = nullptr; |
| 389 | {}, static_cast<u32>(layout_bindings.size()), layout_bindings.data()); | 457 | layout_bindings[1].binding = 1; |
| 390 | 458 | layout_bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | |
| 391 | const auto dev = device.GetLogical(); | 459 | layout_bindings[1].descriptorCount = 1; |
| 392 | const auto& dld = device.GetDispatchLoader(); | 460 | layout_bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; |
| 393 | descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); | 461 | layout_bindings[1].pImmutableSamplers = nullptr; |
| 462 | |||
| 463 | VkDescriptorSetLayoutCreateInfo ci; | ||
| 464 | ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; | ||
| 465 | ci.pNext = nullptr; | ||
| 466 | ci.flags = 0; | ||
| 467 | ci.bindingCount = static_cast<u32>(layout_bindings.size()); | ||
| 468 | ci.pBindings = layout_bindings.data(); | ||
| 469 | |||
| 470 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); | ||
| 394 | } | 471 | } |
| 395 | 472 | ||
| 396 | void VKBlitScreen::CreateDescriptorSets() { | 473 | void VKBlitScreen::CreateDescriptorSets() { |
| 397 | const auto dev = device.GetLogical(); | 474 | const std::vector layouts(image_count, *descriptor_set_layout); |
| 398 | const auto& dld = device.GetDispatchLoader(); | 475 | |
| 399 | 476 | VkDescriptorSetAllocateInfo ai; | |
| 400 | descriptor_sets.resize(image_count); | 477 | ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; |
| 401 | for (std::size_t i = 0; i < image_count; ++i) { | 478 | ai.pNext = nullptr; |
| 402 | const vk::DescriptorSetLayout layout = *descriptor_set_layout; | 479 | ai.descriptorPool = *descriptor_pool; |
| 403 | const vk::DescriptorSetAllocateInfo descriptor_set_ai(*descriptor_pool, 1, &layout); | 480 | ai.descriptorSetCount = static_cast<u32>(image_count); |
| 404 | const vk::Result result = | 481 | ai.pSetLayouts = layouts.data(); |
| 405 | dev.allocateDescriptorSets(&descriptor_set_ai, &descriptor_sets[i], dld); | 482 | descriptor_sets = descriptor_pool.Allocate(ai); |
| 406 | ASSERT(result == vk::Result::eSuccess); | ||
| 407 | } | ||
| 408 | } | 483 | } |
| 409 | 484 | ||
| 410 | void VKBlitScreen::CreatePipelineLayout() { | 485 | void VKBlitScreen::CreatePipelineLayout() { |
| 411 | const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &descriptor_set_layout.get(), 0, | 486 | VkPipelineLayoutCreateInfo ci; |
| 412 | nullptr); | 487 | ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; |
| 413 | const auto dev = device.GetLogical(); | 488 | ci.pNext = nullptr; |
| 414 | const auto& dld = device.GetDispatchLoader(); | 489 | ci.flags = 0; |
| 415 | pipeline_layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); | 490 | ci.setLayoutCount = 1; |
| 491 | ci.pSetLayouts = descriptor_set_layout.address(); | ||
| 492 | ci.pushConstantRangeCount = 0; | ||
| 493 | ci.pPushConstantRanges = nullptr; | ||
| 494 | pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); | ||
| 416 | } | 495 | } |
| 417 | 496 | ||
| 418 | void VKBlitScreen::CreateGraphicsPipeline() { | 497 | void VKBlitScreen::CreateGraphicsPipeline() { |
| 419 | const std::array shader_stages = { | 498 | std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages; |
| 420 | vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eVertex, *vertex_shader, | 499 | shader_stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; |
| 421 | "main", nullptr), | 500 | shader_stages[0].pNext = nullptr; |
| 422 | vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eFragment, *fragment_shader, | 501 | shader_stages[0].flags = 0; |
| 423 | "main", nullptr)}; | 502 | shader_stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT; |
| 503 | shader_stages[0].module = *vertex_shader; | ||
| 504 | shader_stages[0].pName = "main"; | ||
| 505 | shader_stages[0].pSpecializationInfo = nullptr; | ||
| 506 | shader_stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; | ||
| 507 | shader_stages[1].pNext = nullptr; | ||
| 508 | shader_stages[1].flags = 0; | ||
| 509 | shader_stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT; | ||
| 510 | shader_stages[1].module = *fragment_shader; | ||
| 511 | shader_stages[1].pName = "main"; | ||
| 512 | shader_stages[1].pSpecializationInfo = nullptr; | ||
| 424 | 513 | ||
| 425 | const auto vertex_binding_description = ScreenRectVertex::GetDescription(); | 514 | const auto vertex_binding_description = ScreenRectVertex::GetDescription(); |
| 426 | const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); | 515 | const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); |
| 427 | const vk::PipelineVertexInputStateCreateInfo vertex_input( | ||
| 428 | {}, 1, &vertex_binding_description, static_cast<u32>(vertex_attrs_description.size()), | ||
| 429 | vertex_attrs_description.data()); | ||
| 430 | |||
| 431 | const vk::PipelineInputAssemblyStateCreateInfo input_assembly( | ||
| 432 | {}, vk::PrimitiveTopology::eTriangleStrip, false); | ||
| 433 | |||
| 434 | // Set a dummy viewport, it's going to be replaced by dynamic states. | ||
| 435 | const vk::Viewport viewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f); | ||
| 436 | const vk::Rect2D scissor({0, 0}, {1, 1}); | ||
| 437 | 516 | ||
| 438 | const vk::PipelineViewportStateCreateInfo viewport_state({}, 1, &viewport, 1, &scissor); | 517 | VkPipelineVertexInputStateCreateInfo vertex_input_ci; |
| 439 | 518 | vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; | |
| 440 | const vk::PipelineRasterizationStateCreateInfo rasterizer( | 519 | vertex_input_ci.pNext = nullptr; |
| 441 | {}, false, false, vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, | 520 | vertex_input_ci.flags = 0; |
| 442 | vk::FrontFace::eClockwise, false, 0.0f, 0.0f, 0.0f, 1.0f); | 521 | vertex_input_ci.vertexBindingDescriptionCount = 1; |
| 443 | 522 | vertex_input_ci.pVertexBindingDescriptions = &vertex_binding_description; | |
| 444 | const vk::PipelineMultisampleStateCreateInfo multisampling({}, vk::SampleCountFlagBits::e1, | 523 | vertex_input_ci.vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}; |
| 445 | false, 0.0f, nullptr, false, false); | 524 | vertex_input_ci.pVertexAttributeDescriptions = vertex_attrs_description.data(); |
| 446 | 525 | ||
| 447 | const vk::PipelineColorBlendAttachmentState color_blend_attachment( | 526 | VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; |
| 448 | false, vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, | 527 | input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; |
| 449 | vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd, | 528 | input_assembly_ci.pNext = nullptr; |
| 450 | vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | | 529 | input_assembly_ci.flags = 0; |
| 451 | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); | 530 | input_assembly_ci.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; |
| 452 | 531 | input_assembly_ci.primitiveRestartEnable = VK_FALSE; | |
| 453 | const vk::PipelineColorBlendStateCreateInfo color_blending( | 532 | |
| 454 | {}, false, vk::LogicOp::eCopy, 1, &color_blend_attachment, {0.0f, 0.0f, 0.0f, 0.0f}); | 533 | VkPipelineViewportStateCreateInfo viewport_state_ci; |
| 455 | 534 | viewport_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; | |
| 456 | const std::array<vk::DynamicState, 2> dynamic_states = {vk::DynamicState::eViewport, | 535 | viewport_state_ci.pNext = nullptr; |
| 457 | vk::DynamicState::eScissor}; | 536 | viewport_state_ci.flags = 0; |
| 458 | 537 | viewport_state_ci.viewportCount = 1; | |
| 459 | const vk::PipelineDynamicStateCreateInfo dynamic_state( | 538 | viewport_state_ci.pViewports = nullptr; |
| 460 | {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); | 539 | viewport_state_ci.scissorCount = 1; |
| 461 | 540 | viewport_state_ci.pScissors = nullptr; | |
| 462 | const vk::GraphicsPipelineCreateInfo pipeline_ci( | 541 | |
| 463 | {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input, | 542 | VkPipelineRasterizationStateCreateInfo rasterization_ci; |
| 464 | &input_assembly, nullptr, &viewport_state, &rasterizer, &multisampling, nullptr, | 543 | rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; |
| 465 | &color_blending, &dynamic_state, *pipeline_layout, *renderpass, 0, nullptr, 0); | 544 | rasterization_ci.pNext = nullptr; |
| 466 | 545 | rasterization_ci.flags = 0; | |
| 467 | const auto dev = device.GetLogical(); | 546 | rasterization_ci.depthClampEnable = VK_FALSE; |
| 468 | const auto& dld = device.GetDispatchLoader(); | 547 | rasterization_ci.rasterizerDiscardEnable = VK_FALSE; |
| 469 | pipeline = dev.createGraphicsPipelineUnique({}, pipeline_ci, nullptr, dld); | 548 | rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; |
| 549 | rasterization_ci.cullMode = VK_CULL_MODE_NONE; | ||
| 550 | rasterization_ci.frontFace = VK_FRONT_FACE_CLOCKWISE; | ||
| 551 | rasterization_ci.depthBiasEnable = VK_FALSE; | ||
| 552 | rasterization_ci.depthBiasConstantFactor = 0.0f; | ||
| 553 | rasterization_ci.depthBiasClamp = 0.0f; | ||
| 554 | rasterization_ci.depthBiasSlopeFactor = 0.0f; | ||
| 555 | rasterization_ci.lineWidth = 1.0f; | ||
| 556 | |||
| 557 | VkPipelineMultisampleStateCreateInfo multisampling_ci; | ||
| 558 | multisampling_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; | ||
| 559 | multisampling_ci.pNext = nullptr; | ||
| 560 | multisampling_ci.flags = 0; | ||
| 561 | multisampling_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; | ||
| 562 | multisampling_ci.sampleShadingEnable = VK_FALSE; | ||
| 563 | multisampling_ci.minSampleShading = 0.0f; | ||
| 564 | multisampling_ci.pSampleMask = nullptr; | ||
| 565 | multisampling_ci.alphaToCoverageEnable = VK_FALSE; | ||
| 566 | multisampling_ci.alphaToOneEnable = VK_FALSE; | ||
| 567 | |||
| 568 | VkPipelineColorBlendAttachmentState color_blend_attachment; | ||
| 569 | color_blend_attachment.blendEnable = VK_FALSE; | ||
| 570 | color_blend_attachment.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO; | ||
| 571 | color_blend_attachment.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; | ||
| 572 | color_blend_attachment.colorBlendOp = VK_BLEND_OP_ADD; | ||
| 573 | color_blend_attachment.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; | ||
| 574 | color_blend_attachment.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; | ||
| 575 | color_blend_attachment.alphaBlendOp = VK_BLEND_OP_ADD; | ||
| 576 | color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | | ||
| 577 | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; | ||
| 578 | |||
| 579 | VkPipelineColorBlendStateCreateInfo color_blend_ci; | ||
| 580 | color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; | ||
| 581 | color_blend_ci.flags = 0; | ||
| 582 | color_blend_ci.pNext = nullptr; | ||
| 583 | color_blend_ci.logicOpEnable = VK_FALSE; | ||
| 584 | color_blend_ci.logicOp = VK_LOGIC_OP_COPY; | ||
| 585 | color_blend_ci.attachmentCount = 1; | ||
| 586 | color_blend_ci.pAttachments = &color_blend_attachment; | ||
| 587 | color_blend_ci.blendConstants[0] = 0.0f; | ||
| 588 | color_blend_ci.blendConstants[1] = 0.0f; | ||
| 589 | color_blend_ci.blendConstants[2] = 0.0f; | ||
| 590 | color_blend_ci.blendConstants[3] = 0.0f; | ||
| 591 | |||
| 592 | static constexpr std::array dynamic_states = {VK_DYNAMIC_STATE_VIEWPORT, | ||
| 593 | VK_DYNAMIC_STATE_SCISSOR}; | ||
| 594 | VkPipelineDynamicStateCreateInfo dynamic_state_ci; | ||
| 595 | dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; | ||
| 596 | dynamic_state_ci.pNext = nullptr; | ||
| 597 | dynamic_state_ci.flags = 0; | ||
| 598 | dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); | ||
| 599 | dynamic_state_ci.pDynamicStates = dynamic_states.data(); | ||
| 600 | |||
| 601 | VkGraphicsPipelineCreateInfo pipeline_ci; | ||
| 602 | pipeline_ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; | ||
| 603 | pipeline_ci.pNext = nullptr; | ||
| 604 | pipeline_ci.flags = 0; | ||
| 605 | pipeline_ci.stageCount = static_cast<u32>(shader_stages.size()); | ||
| 606 | pipeline_ci.pStages = shader_stages.data(); | ||
| 607 | pipeline_ci.pVertexInputState = &vertex_input_ci; | ||
| 608 | pipeline_ci.pInputAssemblyState = &input_assembly_ci; | ||
| 609 | pipeline_ci.pTessellationState = nullptr; | ||
| 610 | pipeline_ci.pViewportState = &viewport_state_ci; | ||
| 611 | pipeline_ci.pRasterizationState = &rasterization_ci; | ||
| 612 | pipeline_ci.pMultisampleState = &multisampling_ci; | ||
| 613 | pipeline_ci.pDepthStencilState = nullptr; | ||
| 614 | pipeline_ci.pColorBlendState = &color_blend_ci; | ||
| 615 | pipeline_ci.pDynamicState = &dynamic_state_ci; | ||
| 616 | pipeline_ci.layout = *pipeline_layout; | ||
| 617 | pipeline_ci.renderPass = *renderpass; | ||
| 618 | pipeline_ci.subpass = 0; | ||
| 619 | pipeline_ci.basePipelineHandle = 0; | ||
| 620 | pipeline_ci.basePipelineIndex = 0; | ||
| 621 | |||
| 622 | pipeline = device.GetLogical().CreateGraphicsPipeline(pipeline_ci); | ||
| 470 | } | 623 | } |
| 471 | 624 | ||
| 472 | void VKBlitScreen::CreateSampler() { | 625 | void VKBlitScreen::CreateSampler() { |
| 473 | const auto dev = device.GetLogical(); | 626 | VkSamplerCreateInfo ci; |
| 474 | const auto& dld = device.GetDispatchLoader(); | 627 | ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; |
| 475 | const vk::SamplerCreateInfo sampler_ci( | 628 | ci.pNext = nullptr; |
| 476 | {}, vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerMipmapMode::eLinear, | 629 | ci.flags = 0; |
| 477 | vk::SamplerAddressMode::eClampToBorder, vk::SamplerAddressMode::eClampToBorder, | 630 | ci.magFilter = VK_FILTER_LINEAR; |
| 478 | vk::SamplerAddressMode::eClampToBorder, 0.0f, false, 0.0f, false, vk::CompareOp::eNever, | 631 | ci.minFilter = VK_FILTER_NEAREST; |
| 479 | 0.0f, 0.0f, vk::BorderColor::eFloatOpaqueBlack, false); | 632 | ci.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; |
| 480 | sampler = dev.createSamplerUnique(sampler_ci, nullptr, dld); | 633 | ci.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; |
| 634 | ci.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; | ||
| 635 | ci.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; | ||
| 636 | ci.mipLodBias = 0.0f; | ||
| 637 | ci.anisotropyEnable = VK_FALSE; | ||
| 638 | ci.maxAnisotropy = 0.0f; | ||
| 639 | ci.compareEnable = VK_FALSE; | ||
| 640 | ci.compareOp = VK_COMPARE_OP_NEVER; | ||
| 641 | ci.minLod = 0.0f; | ||
| 642 | ci.maxLod = 0.0f; | ||
| 643 | ci.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 644 | ci.unnormalizedCoordinates = VK_FALSE; | ||
| 645 | |||
| 646 | sampler = device.GetLogical().CreateSampler(ci); | ||
| 481 | } | 647 | } |
| 482 | 648 | ||
| 483 | void VKBlitScreen::CreateFramebuffers() { | 649 | void VKBlitScreen::CreateFramebuffers() { |
| 484 | const vk::Extent2D size{swapchain.GetSize()}; | 650 | const VkExtent2D size{swapchain.GetSize()}; |
| 485 | framebuffers.clear(); | ||
| 486 | framebuffers.resize(image_count); | 651 | framebuffers.resize(image_count); |
| 487 | 652 | ||
| 488 | const auto dev = device.GetLogical(); | 653 | VkFramebufferCreateInfo ci; |
| 489 | const auto& dld = device.GetDispatchLoader(); | 654 | ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; |
| 655 | ci.pNext = nullptr; | ||
| 656 | ci.flags = 0; | ||
| 657 | ci.renderPass = *renderpass; | ||
| 658 | ci.attachmentCount = 1; | ||
| 659 | ci.width = size.width; | ||
| 660 | ci.height = size.height; | ||
| 661 | ci.layers = 1; | ||
| 490 | 662 | ||
| 491 | for (std::size_t i = 0; i < image_count; ++i) { | 663 | for (std::size_t i = 0; i < image_count; ++i) { |
| 492 | const vk::ImageView image_view{swapchain.GetImageViewIndex(i)}; | 664 | const VkImageView image_view{swapchain.GetImageViewIndex(i)}; |
| 493 | const vk::FramebufferCreateInfo framebuffer_ci({}, *renderpass, 1, &image_view, size.width, | 665 | ci.pAttachments = &image_view; |
| 494 | size.height, 1); | 666 | framebuffers[i] = device.GetLogical().CreateFramebuffer(ci); |
| 495 | framebuffers[i] = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); | ||
| 496 | } | 667 | } |
| 497 | } | 668 | } |
| 498 | 669 | ||
| @@ -507,54 +678,86 @@ void VKBlitScreen::ReleaseRawImages() { | |||
| 507 | } | 678 | } |
| 508 | 679 | ||
| 509 | void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { | 680 | void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { |
| 510 | const auto dev = device.GetLogical(); | 681 | VkBufferCreateInfo ci; |
| 511 | const auto& dld = device.GetDispatchLoader(); | 682 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 512 | 683 | ci.pNext = nullptr; | |
| 513 | const vk::BufferCreateInfo buffer_ci({}, CalculateBufferSize(framebuffer), | 684 | ci.flags = 0; |
| 514 | vk::BufferUsageFlagBits::eTransferSrc | | 685 | ci.size = CalculateBufferSize(framebuffer); |
| 515 | vk::BufferUsageFlagBits::eVertexBuffer | | 686 | ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 516 | vk::BufferUsageFlagBits::eUniformBuffer, | 687 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; |
| 517 | vk::SharingMode::eExclusive, 0, nullptr); | 688 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; |
| 518 | buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); | 689 | ci.queueFamilyIndexCount = 0; |
| 519 | buffer_commit = memory_manager.Commit(*buffer, true); | 690 | ci.pQueueFamilyIndices = nullptr; |
| 691 | |||
| 692 | buffer = device.GetLogical().CreateBuffer(ci); | ||
| 693 | buffer_commit = memory_manager.Commit(buffer, true); | ||
| 520 | } | 694 | } |
| 521 | 695 | ||
| 522 | void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | 696 | void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { |
| 523 | raw_images.resize(image_count); | 697 | raw_images.resize(image_count); |
| 524 | raw_buffer_commits.resize(image_count); | 698 | raw_buffer_commits.resize(image_count); |
| 525 | 699 | ||
| 526 | const auto format = GetFormat(framebuffer); | 700 | VkImageCreateInfo ci; |
| 701 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; | ||
| 702 | ci.pNext = nullptr; | ||
| 703 | ci.flags = 0; | ||
| 704 | ci.imageType = VK_IMAGE_TYPE_2D; | ||
| 705 | ci.format = GetFormat(framebuffer); | ||
| 706 | ci.extent.width = framebuffer.width; | ||
| 707 | ci.extent.height = framebuffer.height; | ||
| 708 | ci.extent.depth = 1; | ||
| 709 | ci.mipLevels = 1; | ||
| 710 | ci.arrayLayers = 1; | ||
| 711 | ci.samples = VK_SAMPLE_COUNT_1_BIT; | ||
| 712 | ci.tiling = VK_IMAGE_TILING_LINEAR; | ||
| 713 | ci.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; | ||
| 714 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | ||
| 715 | ci.queueFamilyIndexCount = 0; | ||
| 716 | ci.pQueueFamilyIndices = nullptr; | ||
| 717 | ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; | ||
| 718 | |||
| 527 | for (std::size_t i = 0; i < image_count; ++i) { | 719 | for (std::size_t i = 0; i < image_count; ++i) { |
| 528 | const vk::ImageCreateInfo image_ci( | 720 | raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT); |
| 529 | {}, vk::ImageType::e2D, format, {framebuffer.width, framebuffer.height, 1}, 1, 1, | ||
| 530 | vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal, | ||
| 531 | vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled, | ||
| 532 | vk::SharingMode::eExclusive, 0, nullptr, vk::ImageLayout::eUndefined); | ||
| 533 | |||
| 534 | raw_images[i] = | ||
| 535 | std::make_unique<VKImage>(device, scheduler, image_ci, vk::ImageAspectFlagBits::eColor); | ||
| 536 | raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); | 721 | raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false); |
| 537 | } | 722 | } |
| 538 | } | 723 | } |
| 539 | 724 | ||
| 540 | void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const { | 725 | void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { |
| 541 | const vk::DescriptorSet descriptor_set = descriptor_sets[image_index]; | 726 | VkDescriptorBufferInfo buffer_info; |
| 542 | 727 | buffer_info.buffer = *buffer; | |
| 543 | const vk::DescriptorBufferInfo buffer_info(*buffer, offsetof(BufferData, uniform), | 728 | buffer_info.offset = offsetof(BufferData, uniform); |
| 544 | sizeof(BufferData::uniform)); | 729 | buffer_info.range = sizeof(BufferData::uniform); |
| 545 | const vk::WriteDescriptorSet ubo_write(descriptor_set, 0, 0, 1, | 730 | |
| 546 | vk::DescriptorType::eUniformBuffer, nullptr, | 731 | VkWriteDescriptorSet ubo_write; |
| 547 | &buffer_info, nullptr); | 732 | ubo_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; |
| 548 | 733 | ubo_write.pNext = nullptr; | |
| 549 | const vk::DescriptorImageInfo image_info(*sampler, image_view, | 734 | ubo_write.dstSet = descriptor_sets[image_index]; |
| 550 | vk::ImageLayout::eShaderReadOnlyOptimal); | 735 | ubo_write.dstBinding = 0; |
| 551 | const vk::WriteDescriptorSet sampler_write(descriptor_set, 1, 0, 1, | 736 | ubo_write.dstArrayElement = 0; |
| 552 | vk::DescriptorType::eCombinedImageSampler, | 737 | ubo_write.descriptorCount = 1; |
| 553 | &image_info, nullptr, nullptr); | 738 | ubo_write.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; |
| 554 | 739 | ubo_write.pImageInfo = nullptr; | |
| 555 | const auto dev = device.GetLogical(); | 740 | ubo_write.pBufferInfo = &buffer_info; |
| 556 | const auto& dld = device.GetDispatchLoader(); | 741 | ubo_write.pTexelBufferView = nullptr; |
| 557 | dev.updateDescriptorSets({ubo_write, sampler_write}, {}, dld); | 742 | |
| 743 | VkDescriptorImageInfo image_info; | ||
| 744 | image_info.sampler = *sampler; | ||
| 745 | image_info.imageView = image_view; | ||
| 746 | image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; | ||
| 747 | |||
| 748 | VkWriteDescriptorSet sampler_write; | ||
| 749 | sampler_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; | ||
| 750 | sampler_write.pNext = nullptr; | ||
| 751 | sampler_write.dstSet = descriptor_sets[image_index]; | ||
| 752 | sampler_write.dstBinding = 1; | ||
| 753 | sampler_write.dstArrayElement = 0; | ||
| 754 | sampler_write.descriptorCount = 1; | ||
| 755 | sampler_write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | ||
| 756 | sampler_write.pImageInfo = &image_info; | ||
| 757 | sampler_write.pBufferInfo = nullptr; | ||
| 758 | sampler_write.pTexelBufferView = nullptr; | ||
| 759 | |||
| 760 | device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); | ||
| 558 | } | 761 | } |
| 559 | 762 | ||
| 560 | void VKBlitScreen::SetUniformData(BufferData& data, | 763 | void VKBlitScreen::SetUniformData(BufferData& data, |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index ea680b3f5..5eb544aea 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -8,9 +8,9 @@ | |||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <tuple> | 9 | #include <tuple> |
| 10 | 10 | ||
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 11 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 13 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 12 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 14 | 14 | ||
| 15 | namespace Core { | 15 | namespace Core { |
| 16 | class System; | 16 | class System; |
| @@ -49,8 +49,8 @@ public: | |||
| 49 | 49 | ||
| 50 | void Recreate(); | 50 | void Recreate(); |
| 51 | 51 | ||
| 52 | std::tuple<VKFence&, vk::Semaphore> Draw(const Tegra::FramebufferConfig& framebuffer, | 52 | std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer, |
| 53 | bool use_accelerated); | 53 | bool use_accelerated); |
| 54 | 54 | ||
| 55 | private: | 55 | private: |
| 56 | struct BufferData; | 56 | struct BufferData; |
| @@ -74,7 +74,7 @@ private: | |||
| 74 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); | 74 | void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); |
| 75 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); | 75 | void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); |
| 76 | 76 | ||
| 77 | void UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const; | 77 | void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; |
| 78 | void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; | 78 | void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; |
| 79 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; | 79 | void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; |
| 80 | 80 | ||
| @@ -93,23 +93,23 @@ private: | |||
| 93 | const std::size_t image_count; | 93 | const std::size_t image_count; |
| 94 | const VKScreenInfo& screen_info; | 94 | const VKScreenInfo& screen_info; |
| 95 | 95 | ||
| 96 | UniqueShaderModule vertex_shader; | 96 | vk::ShaderModule vertex_shader; |
| 97 | UniqueShaderModule fragment_shader; | 97 | vk::ShaderModule fragment_shader; |
| 98 | UniqueDescriptorPool descriptor_pool; | 98 | vk::DescriptorPool descriptor_pool; |
| 99 | UniqueDescriptorSetLayout descriptor_set_layout; | 99 | vk::DescriptorSetLayout descriptor_set_layout; |
| 100 | UniquePipelineLayout pipeline_layout; | 100 | vk::PipelineLayout pipeline_layout; |
| 101 | UniquePipeline pipeline; | 101 | vk::Pipeline pipeline; |
| 102 | UniqueRenderPass renderpass; | 102 | vk::RenderPass renderpass; |
| 103 | std::vector<UniqueFramebuffer> framebuffers; | 103 | std::vector<vk::Framebuffer> framebuffers; |
| 104 | std::vector<vk::DescriptorSet> descriptor_sets; | 104 | vk::DescriptorSets descriptor_sets; |
| 105 | UniqueSampler sampler; | 105 | vk::Sampler sampler; |
| 106 | 106 | ||
| 107 | UniqueBuffer buffer; | 107 | vk::Buffer buffer; |
| 108 | VKMemoryCommit buffer_commit; | 108 | VKMemoryCommit buffer_commit; |
| 109 | 109 | ||
| 110 | std::vector<std::unique_ptr<VKFenceWatch>> watches; | 110 | std::vector<std::unique_ptr<VKFenceWatch>> watches; |
| 111 | 111 | ||
| 112 | std::vector<UniqueSemaphore> semaphores; | 112 | std::vector<vk::Semaphore> semaphores; |
| 113 | std::vector<std::unique_ptr<VKImage>> raw_images; | 113 | std::vector<std::unique_ptr<VKImage>> raw_images; |
| 114 | std::vector<VKMemoryCommit> raw_buffer_commits; | 114 | std::vector<VKMemoryCommit> raw_buffer_commits; |
| 115 | u32 raw_width = 0; | 115 | u32 raw_width = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1ba544943..0d167afbd 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -11,48 +11,50 @@ | |||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/bit_util.h" | 12 | #include "common/bit_util.h" |
| 13 | #include "core/core.h" | 13 | #include "core/core.h" |
| 14 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 14 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 16 | #include "video_core/renderer_vulkan/vk_device.h" | 15 | #include "video_core/renderer_vulkan/vk_device.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 17 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 18 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 19 | 19 | ||
| 20 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | 21 | ||
| 22 | namespace { | 22 | namespace { |
| 23 | 23 | ||
| 24 | const auto BufferUsage = | 24 | constexpr VkBufferUsageFlags BUFFER_USAGE = |
| 25 | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | | 25 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | |
| 26 | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; | 26 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; |
| 27 | 27 | ||
| 28 | const auto UploadPipelineStage = | 28 | constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = |
| 29 | vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | | 29 | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | |
| 30 | vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | | 30 | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | |
| 31 | vk::PipelineStageFlagBits::eComputeShader; | 31 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; |
| 32 | 32 | ||
| 33 | const auto UploadAccessBarriers = | 33 | constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = |
| 34 | vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | | 34 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | |
| 35 | vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | | 35 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; |
| 36 | vk::AccessFlagBits::eIndexRead; | ||
| 37 | 36 | ||
| 38 | auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { | 37 | std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { |
| 39 | return std::make_unique<VKStreamBuffer>(device, scheduler, BufferUsage); | 38 | return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE); |
| 40 | } | 39 | } |
| 41 | 40 | ||
| 42 | } // Anonymous namespace | 41 | } // Anonymous namespace |
| 43 | 42 | ||
| 44 | CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 43 | CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, |
| 45 | CacheAddr cache_addr, std::size_t size) | 44 | VAddr cpu_addr, std::size_t size) |
| 46 | : VideoCommon::BufferBlock{cache_addr, size} { | 45 | : VideoCommon::BufferBlock{cpu_addr, size} { |
| 47 | const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), | 46 | VkBufferCreateInfo ci; |
| 48 | BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | | 47 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 49 | vk::BufferUsageFlagBits::eTransferDst, | 48 | ci.pNext = nullptr; |
| 50 | vk::SharingMode::eExclusive, 0, nullptr); | 49 | ci.flags = 0; |
| 51 | 50 | ci.size = static_cast<VkDeviceSize>(size); | |
| 52 | const auto& dld{device.GetDispatchLoader()}; | 51 | ci.usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; |
| 53 | const auto dev{device.GetLogical()}; | 52 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; |
| 54 | buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); | 53 | ci.queueFamilyIndexCount = 0; |
| 55 | buffer.commit = memory_manager.Commit(*buffer.handle, false); | 54 | ci.pQueueFamilyIndices = nullptr; |
| 55 | |||
| 56 | buffer.handle = device.GetLogical().CreateBuffer(ci); | ||
| 57 | buffer.commit = memory_manager.Commit(buffer.handle, false); | ||
| 56 | } | 58 | } |
| 57 | 59 | ||
| 58 | CachedBufferBlock::~CachedBufferBlock() = default; | 60 | CachedBufferBlock::~CachedBufferBlock() = default; |
| @@ -60,30 +62,30 @@ CachedBufferBlock::~CachedBufferBlock() = default; | |||
| 60 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 62 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 61 | const VKDevice& device, VKMemoryManager& memory_manager, | 63 | const VKDevice& device, VKMemoryManager& memory_manager, |
| 62 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool) | 64 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool) |
| 63 | : VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer>{rasterizer, system, | 65 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, |
| 64 | CreateStreamBuffer(device, | 66 | CreateStreamBuffer(device, |
| 65 | scheduler)}, | 67 | scheduler)}, |
| 66 | device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ | 68 | device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ |
| 67 | staging_pool} {} | 69 | staging_pool} {} |
| 68 | 70 | ||
| 69 | VKBufferCache::~VKBufferCache() = default; | 71 | VKBufferCache::~VKBufferCache() = default; |
| 70 | 72 | ||
| 71 | Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | 73 | Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 72 | return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); | 74 | return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); |
| 73 | } | 75 | } |
| 74 | 76 | ||
| 75 | const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { | 77 | const VkBuffer* VKBufferCache::ToHandle(const Buffer& buffer) { |
| 76 | return buffer->GetHandle(); | 78 | return buffer->GetHandle(); |
| 77 | } | 79 | } |
| 78 | 80 | ||
| 79 | const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { | 81 | const VkBuffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { |
| 80 | size = std::max(size, std::size_t(4)); | 82 | size = std::max(size, std::size_t(4)); |
| 81 | const auto& empty = staging_pool.GetUnusedBuffer(size, false); | 83 | const auto& empty = staging_pool.GetUnusedBuffer(size, false); |
| 82 | scheduler.RequestOutsideRenderPassOperationContext(); | 84 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 83 | scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { | 85 | scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { |
| 84 | cmdbuf.fillBuffer(buffer, 0, size, 0, dld); | 86 | cmdbuf.FillBuffer(buffer, 0, size, 0); |
| 85 | }); | 87 | }); |
| 86 | return &*empty.handle; | 88 | return empty.handle.address(); |
| 87 | } | 89 | } |
| 88 | 90 | ||
| 89 | void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 91 | void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| @@ -93,14 +95,21 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st | |||
| 93 | 95 | ||
| 94 | scheduler.RequestOutsideRenderPassOperationContext(); | 96 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 95 | scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, | 97 | scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, |
| 96 | size](auto cmdbuf, auto& dld) { | 98 | size](vk::CommandBuffer cmdbuf) { |
| 97 | cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); | 99 | cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); |
| 98 | cmdbuf.pipelineBarrier( | 100 | |
| 99 | vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, | 101 | VkBufferMemoryBarrier barrier; |
| 100 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, | 102 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 101 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, | 103 | barrier.pNext = nullptr; |
| 102 | offset, size)}, | 104 | barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; |
| 103 | {}, dld); | 105 | barrier.dstAccessMask = UPLOAD_ACCESS_BARRIERS; |
| 106 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 107 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 108 | barrier.buffer = buffer; | ||
| 109 | barrier.offset = offset; | ||
| 110 | barrier.size = size; | ||
| 111 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | ||
| 112 | barrier, {}); | ||
| 104 | }); | 113 | }); |
| 105 | } | 114 | } |
| 106 | 115 | ||
| @@ -109,16 +118,23 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 109 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | 118 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 110 | scheduler.RequestOutsideRenderPassOperationContext(); | 119 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 111 | scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, | 120 | scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, |
| 112 | size](auto cmdbuf, auto& dld) { | 121 | size](vk::CommandBuffer cmdbuf) { |
| 113 | cmdbuf.pipelineBarrier( | 122 | VkBufferMemoryBarrier barrier; |
| 114 | vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | | 123 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 115 | vk::PipelineStageFlagBits::eComputeShader, | 124 | barrier.pNext = nullptr; |
| 116 | vk::PipelineStageFlagBits::eTransfer, {}, {}, | 125 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; |
| 117 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, | 126 | barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; |
| 118 | vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, | 127 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 119 | VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, | 128 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 120 | {}, dld); | 129 | barrier.buffer = buffer; |
| 121 | cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); | 130 | barrier.offset = offset; |
| 131 | barrier.size = size; | ||
| 132 | |||
| 133 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | | ||
| 134 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | ||
| 135 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 136 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); | ||
| 137 | cmdbuf.CopyBuffer(buffer, staging, VkBufferCopy{offset, 0, size}); | ||
| 122 | }); | 138 | }); |
| 123 | scheduler.Finish(); | 139 | scheduler.Finish(); |
| 124 | 140 | ||
| @@ -129,17 +145,30 @@ void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t | |||
| 129 | std::size_t dst_offset, std::size_t size) { | 145 | std::size_t dst_offset, std::size_t size) { |
| 130 | scheduler.RequestOutsideRenderPassOperationContext(); | 146 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 131 | scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, | 147 | scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, |
| 132 | dst_offset, size](auto cmdbuf, auto& dld) { | 148 | dst_offset, size](vk::CommandBuffer cmdbuf) { |
| 133 | cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); | 149 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); |
| 134 | cmdbuf.pipelineBarrier( | 150 | |
| 135 | vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, | 151 | std::array<VkBufferMemoryBarrier, 2> barriers; |
| 136 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, | 152 | barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 137 | vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, | 153 | barriers[0].pNext = nullptr; |
| 138 | VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), | 154 | barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; |
| 139 | vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, | 155 | barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; |
| 140 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, | 156 | barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 141 | dst_offset, size)}, | 157 | barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 142 | {}, dld); | 158 | barriers[0].buffer = src_buffer; |
| 159 | barriers[0].offset = src_offset; | ||
| 160 | barriers[0].size = size; | ||
| 161 | barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 162 | barriers[1].pNext = nullptr; | ||
| 163 | barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 164 | barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; | ||
| 165 | barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 166 | barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 167 | barriers[1].buffer = dst_buffer; | ||
| 168 | barriers[1].offset = dst_offset; | ||
| 169 | barriers[1].size = size; | ||
| 170 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | ||
| 171 | barriers, {}); | ||
| 143 | }); | 172 | }); |
| 144 | } | 173 | } |
| 145 | 174 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3f38eed0c..d3c23da98 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -11,11 +11,11 @@ | |||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/buffer_cache/buffer_cache.h" | 12 | #include "video_core/buffer_cache/buffer_cache.h" |
| 13 | #include "video_core/rasterizer_cache.h" | 13 | #include "video_core/rasterizer_cache.h" |
| 14 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 14 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 17 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 16 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 18 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 17 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 18 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 19 | 19 | ||
| 20 | namespace Core { | 20 | namespace Core { |
| 21 | class System; | 21 | class System; |
| @@ -30,11 +30,11 @@ class VKScheduler; | |||
| 30 | class CachedBufferBlock final : public VideoCommon::BufferBlock { | 30 | class CachedBufferBlock final : public VideoCommon::BufferBlock { |
| 31 | public: | 31 | public: |
| 32 | explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 32 | explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, |
| 33 | CacheAddr cache_addr, std::size_t size); | 33 | VAddr cpu_addr, std::size_t size); |
| 34 | ~CachedBufferBlock(); | 34 | ~CachedBufferBlock(); |
| 35 | 35 | ||
| 36 | const vk::Buffer* GetHandle() const { | 36 | const VkBuffer* GetHandle() const { |
| 37 | return &*buffer.handle; | 37 | return buffer.handle.address(); |
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | private: | 40 | private: |
| @@ -43,21 +43,21 @@ private: | |||
| 43 | 43 | ||
| 44 | using Buffer = std::shared_ptr<CachedBufferBlock>; | 44 | using Buffer = std::shared_ptr<CachedBufferBlock>; |
| 45 | 45 | ||
| 46 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, vk::Buffer, VKStreamBuffer> { | 46 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { |
| 47 | public: | 47 | public: |
| 48 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 48 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 49 | const VKDevice& device, VKMemoryManager& memory_manager, | 49 | const VKDevice& device, VKMemoryManager& memory_manager, |
| 50 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool); | 50 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool); |
| 51 | ~VKBufferCache(); | 51 | ~VKBufferCache(); |
| 52 | 52 | ||
| 53 | const vk::Buffer* GetEmptyBuffer(std::size_t size) override; | 53 | const VkBuffer* GetEmptyBuffer(std::size_t size) override; |
| 54 | 54 | ||
| 55 | protected: | 55 | protected: |
| 56 | void WriteBarrier() override {} | 56 | void WriteBarrier() override {} |
| 57 | 57 | ||
| 58 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | 58 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 59 | 59 | ||
| 60 | const vk::Buffer* ToHandle(const Buffer& buffer) override; | 60 | const VkBuffer* ToHandle(const Buffer& buffer) override; |
| 61 | 61 | ||
| 62 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 62 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 63 | const u8* data) override; | 63 | const u8* data) override; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 7bdda3d79..9d92305f4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,13 +10,13 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 13 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 15 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 16 | #include "video_core/renderer_vulkan/vk_device.h" | 15 | #include "video_core/renderer_vulkan/vk_device.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 17 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 19 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 19 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 20 | 20 | ||
| 21 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | 22 | ||
| @@ -114,6 +114,35 @@ constexpr u8 quad_array[] = { | |||
| 114 | 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, | 114 | 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, |
| 115 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | 115 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; |
| 116 | 116 | ||
| 117 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { | ||
| 118 | VkDescriptorSetLayoutBinding binding; | ||
| 119 | binding.binding = 0; | ||
| 120 | binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||
| 121 | binding.descriptorCount = 1; | ||
| 122 | binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | ||
| 123 | binding.pImmutableSamplers = nullptr; | ||
| 124 | return binding; | ||
| 125 | } | ||
| 126 | |||
| 127 | VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { | ||
| 128 | VkDescriptorUpdateTemplateEntryKHR entry; | ||
| 129 | entry.dstBinding = 0; | ||
| 130 | entry.dstArrayElement = 0; | ||
| 131 | entry.descriptorCount = 1; | ||
| 132 | entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||
| 133 | entry.offset = 0; | ||
| 134 | entry.stride = sizeof(DescriptorUpdateEntry); | ||
| 135 | return entry; | ||
| 136 | } | ||
| 137 | |||
| 138 | VkPushConstantRange BuildQuadArrayPassPushConstantRange() { | ||
| 139 | VkPushConstantRange range; | ||
| 140 | range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | ||
| 141 | range.offset = 0; | ||
| 142 | range.size = sizeof(u32); | ||
| 143 | return range; | ||
| 144 | } | ||
| 145 | |||
| 117 | // Uint8 SPIR-V module. Generated from the "shaders/" directory. | 146 | // Uint8 SPIR-V module. Generated from the "shaders/" directory. |
| 118 | constexpr u8 uint8_pass[] = { | 147 | constexpr u8 uint8_pass[] = { |
| 119 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, | 148 | 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00, |
| @@ -191,53 +220,111 @@ constexpr u8 uint8_pass[] = { | |||
| 191 | 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, | 220 | 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, |
| 192 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | 221 | 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; |
| 193 | 222 | ||
| 223 | std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { | ||
| 224 | std::array<VkDescriptorSetLayoutBinding, 2> bindings; | ||
| 225 | bindings[0].binding = 0; | ||
| 226 | bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||
| 227 | bindings[0].descriptorCount = 1; | ||
| 228 | bindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | ||
| 229 | bindings[0].pImmutableSamplers = nullptr; | ||
| 230 | bindings[1].binding = 1; | ||
| 231 | bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||
| 232 | bindings[1].descriptorCount = 1; | ||
| 233 | bindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | ||
| 234 | bindings[1].pImmutableSamplers = nullptr; | ||
| 235 | return bindings; | ||
| 236 | } | ||
| 237 | |||
| 238 | VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { | ||
| 239 | VkDescriptorUpdateTemplateEntryKHR entry; | ||
| 240 | entry.dstBinding = 0; | ||
| 241 | entry.dstArrayElement = 0; | ||
| 242 | entry.descriptorCount = 2; | ||
| 243 | entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||
| 244 | entry.offset = 0; | ||
| 245 | entry.stride = sizeof(DescriptorUpdateEntry); | ||
| 246 | return entry; | ||
| 247 | } | ||
| 248 | |||
| 194 | } // Anonymous namespace | 249 | } // Anonymous namespace |
| 195 | 250 | ||
| 196 | VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | 251 | VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, |
| 197 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | 252 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 198 | const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, | 253 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 199 | const std::vector<vk::PushConstantRange> push_constants, | 254 | vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, |
| 200 | std::size_t code_size, const u8* code) { | 255 | const u8* code) { |
| 201 | const auto dev = device.GetLogical(); | 256 | VkDescriptorSetLayoutCreateInfo descriptor_layout_ci; |
| 202 | const auto& dld = device.GetDispatchLoader(); | 257 | descriptor_layout_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; |
| 203 | 258 | descriptor_layout_ci.pNext = nullptr; | |
| 204 | const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci( | 259 | descriptor_layout_ci.flags = 0; |
| 205 | {}, static_cast<u32>(bindings.size()), bindings.data()); | 260 | descriptor_layout_ci.bindingCount = bindings.size(); |
| 206 | descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld); | 261 | descriptor_layout_ci.pBindings = bindings.data(); |
| 207 | 262 | descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(descriptor_layout_ci); | |
| 208 | const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, | 263 | |
| 209 | static_cast<u32>(push_constants.size()), | 264 | VkPipelineLayoutCreateInfo pipeline_layout_ci; |
| 210 | push_constants.data()); | 265 | pipeline_layout_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; |
| 211 | layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); | 266 | pipeline_layout_ci.pNext = nullptr; |
| 267 | pipeline_layout_ci.flags = 0; | ||
| 268 | pipeline_layout_ci.setLayoutCount = 1; | ||
| 269 | pipeline_layout_ci.pSetLayouts = descriptor_set_layout.address(); | ||
| 270 | pipeline_layout_ci.pushConstantRangeCount = push_constants.size(); | ||
| 271 | pipeline_layout_ci.pPushConstantRanges = push_constants.data(); | ||
| 272 | layout = device.GetLogical().CreatePipelineLayout(pipeline_layout_ci); | ||
| 212 | 273 | ||
| 213 | if (!templates.empty()) { | 274 | if (!templates.empty()) { |
| 214 | const vk::DescriptorUpdateTemplateCreateInfo template_ci( | 275 | VkDescriptorUpdateTemplateCreateInfoKHR template_ci; |
| 215 | {}, static_cast<u32>(templates.size()), templates.data(), | 276 | template_ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; |
| 216 | vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, | 277 | template_ci.pNext = nullptr; |
| 217 | vk::PipelineBindPoint::eGraphics, *layout, 0); | 278 | template_ci.flags = 0; |
| 218 | descriptor_template = dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); | 279 | template_ci.descriptorUpdateEntryCount = templates.size(); |
| 280 | template_ci.pDescriptorUpdateEntries = templates.data(); | ||
| 281 | template_ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; | ||
| 282 | template_ci.descriptorSetLayout = *descriptor_set_layout; | ||
| 283 | template_ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; | ||
| 284 | template_ci.pipelineLayout = *layout; | ||
| 285 | template_ci.set = 0; | ||
| 286 | descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR(template_ci); | ||
| 219 | 287 | ||
| 220 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); | 288 | descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout); |
| 221 | } | 289 | } |
| 222 | 290 | ||
| 223 | auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); | 291 | auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1); |
| 224 | std::memcpy(code_copy.get(), code, code_size); | 292 | std::memcpy(code_copy.get(), code, code_size); |
| 225 | const vk::ShaderModuleCreateInfo module_ci({}, code_size, code_copy.get()); | ||
| 226 | module = dev.createShaderModuleUnique(module_ci, nullptr, dld); | ||
| 227 | 293 | ||
| 228 | const vk::PipelineShaderStageCreateInfo stage_ci({}, vk::ShaderStageFlagBits::eCompute, *module, | 294 | VkShaderModuleCreateInfo module_ci; |
| 229 | "main", nullptr); | 295 | module_ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; |
| 296 | module_ci.pNext = nullptr; | ||
| 297 | module_ci.flags = 0; | ||
| 298 | module_ci.codeSize = code_size; | ||
| 299 | module_ci.pCode = code_copy.get(); | ||
| 300 | module = device.GetLogical().CreateShaderModule(module_ci); | ||
| 301 | |||
| 302 | VkComputePipelineCreateInfo pipeline_ci; | ||
| 303 | pipeline_ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; | ||
| 304 | pipeline_ci.pNext = nullptr; | ||
| 305 | pipeline_ci.flags = 0; | ||
| 306 | pipeline_ci.layout = *layout; | ||
| 307 | pipeline_ci.basePipelineHandle = nullptr; | ||
| 308 | pipeline_ci.basePipelineIndex = 0; | ||
| 230 | 309 | ||
| 231 | const vk::ComputePipelineCreateInfo pipeline_ci({}, stage_ci, *layout, nullptr, 0); | 310 | VkPipelineShaderStageCreateInfo& stage_ci = pipeline_ci.stage; |
| 232 | pipeline = dev.createComputePipelineUnique(nullptr, pipeline_ci, nullptr, dld); | 311 | stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; |
| 312 | stage_ci.pNext = nullptr; | ||
| 313 | stage_ci.flags = 0; | ||
| 314 | stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; | ||
| 315 | stage_ci.module = *module; | ||
| 316 | stage_ci.pName = "main"; | ||
| 317 | stage_ci.pSpecializationInfo = nullptr; | ||
| 318 | |||
| 319 | pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci); | ||
| 233 | } | 320 | } |
| 234 | 321 | ||
| 235 | VKComputePass::~VKComputePass() = default; | 322 | VKComputePass::~VKComputePass() = default; |
| 236 | 323 | ||
| 237 | vk::DescriptorSet VKComputePass::CommitDescriptorSet( | 324 | VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, |
| 238 | VKUpdateDescriptorQueue& update_descriptor_queue, VKFence& fence) { | 325 | VKFence& fence) { |
| 239 | if (!descriptor_template) { | 326 | if (!descriptor_template) { |
| 240 | return {}; | 327 | return nullptr; |
| 241 | } | 328 | } |
| 242 | const auto set = descriptor_allocator->Commit(fence); | 329 | const auto set = descriptor_allocator->Commit(fence); |
| 243 | update_descriptor_queue.Send(*descriptor_template, set); | 330 | update_descriptor_queue.Send(*descriptor_template, set); |
| @@ -248,25 +335,21 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, | |||
| 248 | VKDescriptorPool& descriptor_pool, | 335 | VKDescriptorPool& descriptor_pool, |
| 249 | VKStagingBufferPool& staging_buffer_pool, | 336 | VKStagingBufferPool& staging_buffer_pool, |
| 250 | VKUpdateDescriptorQueue& update_descriptor_queue) | 337 | VKUpdateDescriptorQueue& update_descriptor_queue) |
| 251 | : VKComputePass(device, descriptor_pool, | 338 | : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), |
| 252 | {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, | 339 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), |
| 253 | vk::ShaderStageFlagBits::eCompute, nullptr)}, | 340 | BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), |
| 254 | {vk::DescriptorUpdateTemplateEntry(0, 0, 1, vk::DescriptorType::eStorageBuffer, | ||
| 255 | 0, sizeof(DescriptorUpdateEntry))}, | ||
| 256 | {vk::PushConstantRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(u32))}, | ||
| 257 | std::size(quad_array), quad_array), | ||
| 258 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | 341 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, |
| 259 | update_descriptor_queue{update_descriptor_queue} {} | 342 | update_descriptor_queue{update_descriptor_queue} {} |
| 260 | 343 | ||
| 261 | QuadArrayPass::~QuadArrayPass() = default; | 344 | QuadArrayPass::~QuadArrayPass() = default; |
| 262 | 345 | ||
| 263 | std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | 346 | std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { |
| 264 | const u32 num_triangle_vertices = num_vertices * 6 / 4; | 347 | const u32 num_triangle_vertices = num_vertices * 6 / 4; |
| 265 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | 348 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); |
| 266 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | 349 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); |
| 267 | 350 | ||
| 268 | update_descriptor_queue.Acquire(); | 351 | update_descriptor_queue.Acquire(); |
| 269 | update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); | 352 | update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size); |
| 270 | const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); | 353 | const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); |
| 271 | 354 | ||
| 272 | scheduler.RequestOutsideRenderPassOperationContext(); | 355 | scheduler.RequestOutsideRenderPassOperationContext(); |
| @@ -274,66 +357,72 @@ std::pair<const vk::Buffer&, vk::DeviceSize> QuadArrayPass::Assemble(u32 num_ver | |||
| 274 | ASSERT(num_vertices % 4 == 0); | 357 | ASSERT(num_vertices % 4 == 0); |
| 275 | const u32 num_quads = num_vertices / 4; | 358 | const u32 num_quads = num_vertices / 4; |
| 276 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads, | 359 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, num_quads, |
| 277 | first, set](auto cmdbuf, auto& dld) { | 360 | first, set](vk::CommandBuffer cmdbuf) { |
| 278 | constexpr u32 dispatch_size = 1024; | 361 | constexpr u32 dispatch_size = 1024; |
| 279 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); | 362 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 280 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); | 363 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 281 | cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(first), &first, | 364 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); |
| 282 | dld); | 365 | cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); |
| 283 | cmdbuf.dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1, dld); | 366 | |
| 284 | 367 | VkBufferMemoryBarrier barrier; | |
| 285 | const vk::BufferMemoryBarrier barrier( | 368 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 286 | vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, | 369 | barrier.pNext = nullptr; |
| 287 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, | 370 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; |
| 288 | static_cast<vk::DeviceSize>(num_quads) * 6 * sizeof(u32)); | 371 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; |
| 289 | cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, | 372 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 290 | vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); | 373 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 374 | barrier.buffer = buffer; | ||
| 375 | barrier.offset = 0; | ||
| 376 | barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); | ||
| 377 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 378 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); | ||
| 291 | }); | 379 | }); |
| 292 | return {*buffer.handle, 0}; | 380 | return {buffer.handle.address(), 0}; |
| 293 | } | 381 | } |
| 294 | 382 | ||
| 295 | Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, | 383 | Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, |
| 296 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, | 384 | VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, |
| 297 | VKUpdateDescriptorQueue& update_descriptor_queue) | 385 | VKUpdateDescriptorQueue& update_descriptor_queue) |
| 298 | : VKComputePass(device, descriptor_pool, | 386 | : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), |
| 299 | {vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eStorageBuffer, 1, | 387 | BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), |
| 300 | vk::ShaderStageFlagBits::eCompute, nullptr), | 388 | uint8_pass), |
| 301 | vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eStorageBuffer, 1, | ||
| 302 | vk::ShaderStageFlagBits::eCompute, nullptr)}, | ||
| 303 | {vk::DescriptorUpdateTemplateEntry(0, 0, 2, vk::DescriptorType::eStorageBuffer, | ||
| 304 | 0, sizeof(DescriptorUpdateEntry))}, | ||
| 305 | {}, std::size(uint8_pass), uint8_pass), | ||
| 306 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | 389 | scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, |
| 307 | update_descriptor_queue{update_descriptor_queue} {} | 390 | update_descriptor_queue{update_descriptor_queue} {} |
| 308 | 391 | ||
| 309 | Uint8Pass::~Uint8Pass() = default; | 392 | Uint8Pass::~Uint8Pass() = default; |
| 310 | 393 | ||
| 311 | std::pair<const vk::Buffer*, u64> Uint8Pass::Assemble(u32 num_vertices, vk::Buffer src_buffer, | 394 | std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 312 | u64 src_offset) { | 395 | u64 src_offset) { |
| 313 | const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | 396 | const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); |
| 314 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | 397 | auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); |
| 315 | 398 | ||
| 316 | update_descriptor_queue.Acquire(); | 399 | update_descriptor_queue.Acquire(); |
| 317 | update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); | 400 | update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); |
| 318 | update_descriptor_queue.AddBuffer(&*buffer.handle, 0, staging_size); | 401 | update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size); |
| 319 | const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); | 402 | const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); |
| 320 | 403 | ||
| 321 | scheduler.RequestOutsideRenderPassOperationContext(); | 404 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 322 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, | 405 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, |
| 323 | num_vertices](auto cmdbuf, auto& dld) { | 406 | num_vertices](vk::CommandBuffer cmdbuf) { |
| 324 | constexpr u32 dispatch_size = 1024; | 407 | constexpr u32 dispatch_size = 1024; |
| 325 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline, dld); | 408 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 326 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, 0, {set}, {}, dld); | 409 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 327 | cmdbuf.dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1, dld); | 410 | cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); |
| 328 | 411 | ||
| 329 | const vk::BufferMemoryBarrier barrier( | 412 | VkBufferMemoryBarrier barrier; |
| 330 | vk::AccessFlagBits::eShaderWrite, vk::AccessFlagBits::eVertexAttributeRead, | 413 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 331 | VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, 0, | 414 | barrier.pNext = nullptr; |
| 332 | static_cast<vk::DeviceSize>(num_vertices) * sizeof(u16)); | 415 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; |
| 333 | cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, | 416 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; |
| 334 | vk::PipelineStageFlagBits::eVertexInput, {}, {}, {barrier}, {}, dld); | 417 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; |
| 418 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 419 | barrier.buffer = buffer; | ||
| 420 | barrier.offset = 0; | ||
| 421 | barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); | ||
| 422 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 423 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | ||
| 335 | }); | 424 | }); |
| 336 | return {&*buffer.handle, 0}; | 425 | return {buffer.handle.address(), 0}; |
| 337 | } | 426 | } |
| 338 | 427 | ||
| 339 | } // namespace Vulkan | 428 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 7057eb837..c62516bff 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -8,8 +8,8 @@ | |||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -22,24 +22,24 @@ class VKUpdateDescriptorQueue; | |||
| 22 | class VKComputePass { | 22 | class VKComputePass { |
| 23 | public: | 23 | public: |
| 24 | explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, | 24 | explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool, |
| 25 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | 25 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 26 | const std::vector<vk::DescriptorUpdateTemplateEntry>& templates, | 26 | vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates, |
| 27 | const std::vector<vk::PushConstantRange> push_constants, | 27 | vk::Span<VkPushConstantRange> push_constants, std::size_t code_size, |
| 28 | std::size_t code_size, const u8* code); | 28 | const u8* code); |
| 29 | ~VKComputePass(); | 29 | ~VKComputePass(); |
| 30 | 30 | ||
| 31 | protected: | 31 | protected: |
| 32 | vk::DescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, | 32 | VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, |
| 33 | VKFence& fence); | 33 | VKFence& fence); |
| 34 | 34 | ||
| 35 | UniqueDescriptorUpdateTemplate descriptor_template; | 35 | vk::DescriptorUpdateTemplateKHR descriptor_template; |
| 36 | UniquePipelineLayout layout; | 36 | vk::PipelineLayout layout; |
| 37 | UniquePipeline pipeline; | 37 | vk::Pipeline pipeline; |
| 38 | 38 | ||
| 39 | private: | 39 | private: |
| 40 | UniqueDescriptorSetLayout descriptor_set_layout; | 40 | vk::DescriptorSetLayout descriptor_set_layout; |
| 41 | std::optional<DescriptorAllocator> descriptor_allocator; | 41 | std::optional<DescriptorAllocator> descriptor_allocator; |
| 42 | UniqueShaderModule module; | 42 | vk::ShaderModule module; |
| 43 | }; | 43 | }; |
| 44 | 44 | ||
| 45 | class QuadArrayPass final : public VKComputePass { | 45 | class QuadArrayPass final : public VKComputePass { |
| @@ -50,7 +50,7 @@ public: | |||
| 50 | VKUpdateDescriptorQueue& update_descriptor_queue); | 50 | VKUpdateDescriptorQueue& update_descriptor_queue); |
| 51 | ~QuadArrayPass(); | 51 | ~QuadArrayPass(); |
| 52 | 52 | ||
| 53 | std::pair<const vk::Buffer&, vk::DeviceSize> Assemble(u32 num_vertices, u32 first); | 53 | std::pair<const VkBuffer*, VkDeviceSize> Assemble(u32 num_vertices, u32 first); |
| 54 | 54 | ||
| 55 | private: | 55 | private: |
| 56 | VKScheduler& scheduler; | 56 | VKScheduler& scheduler; |
| @@ -65,8 +65,7 @@ public: | |||
| 65 | VKUpdateDescriptorQueue& update_descriptor_queue); | 65 | VKUpdateDescriptorQueue& update_descriptor_queue); |
| 66 | ~Uint8Pass(); | 66 | ~Uint8Pass(); |
| 67 | 67 | ||
| 68 | std::pair<const vk::Buffer*, u64> Assemble(u32 num_vertices, vk::Buffer src_buffer, | 68 | std::pair<const VkBuffer*, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); |
| 69 | u64 src_offset); | ||
| 70 | 69 | ||
| 71 | private: | 70 | private: |
| 72 | VKScheduler& scheduler; | 71 | VKScheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 60f57d83e..23beafa4f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include <memory> | 5 | #include <memory> |
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | 7 | ||
| 8 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 8 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 10 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 9 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | 10 | #include "video_core/renderer_vulkan/vk_device.h" |
| @@ -14,6 +13,7 @@ | |||
| 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 15 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 14 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 15 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| @@ -30,7 +30,7 @@ VKComputePipeline::VKComputePipeline(const VKDevice& device, VKScheduler& schedu | |||
| 30 | 30 | ||
| 31 | VKComputePipeline::~VKComputePipeline() = default; | 31 | VKComputePipeline::~VKComputePipeline() = default; |
| 32 | 32 | ||
| 33 | vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { | 33 | VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { |
| 34 | if (!descriptor_template) { | 34 | if (!descriptor_template) { |
| 35 | return {}; | 35 | return {}; |
| 36 | } | 36 | } |
| @@ -39,74 +39,109 @@ vk::DescriptorSet VKComputePipeline::CommitDescriptorSet() { | |||
| 39 | return set; | 39 | return set; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | UniqueDescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | 42 | vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { |
| 43 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | 43 | std::vector<VkDescriptorSetLayoutBinding> bindings; |
| 44 | u32 binding = 0; | 44 | u32 binding = 0; |
| 45 | const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { | 45 | const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) { |
| 46 | // TODO(Rodrigo): Maybe make individual bindings here? | 46 | // TODO(Rodrigo): Maybe make individual bindings here? |
| 47 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { | 47 | for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) { |
| 48 | bindings.emplace_back(binding++, descriptor_type, 1, vk::ShaderStageFlagBits::eCompute, | 48 | VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); |
| 49 | nullptr); | 49 | entry.binding = binding++; |
| 50 | entry.descriptorType = descriptor_type; | ||
| 51 | entry.descriptorCount = 1; | ||
| 52 | entry.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | ||
| 53 | entry.pImmutableSamplers = nullptr; | ||
| 50 | } | 54 | } |
| 51 | }; | 55 | }; |
| 52 | AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); | 56 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); |
| 53 | AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); | 57 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); |
| 54 | AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); | 58 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); |
| 55 | AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); | 59 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); |
| 56 | AddBindings(vk::DescriptorType::eStorageImage, entries.images.size()); | 60 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); |
| 57 | 61 | ||
| 58 | const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( | 62 | VkDescriptorSetLayoutCreateInfo ci; |
| 59 | {}, static_cast<u32>(bindings.size()), bindings.data()); | 63 | ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; |
| 60 | 64 | ci.pNext = nullptr; | |
| 61 | const auto dev = device.GetLogical(); | 65 | ci.flags = 0; |
| 62 | const auto& dld = device.GetDispatchLoader(); | 66 | ci.bindingCount = static_cast<u32>(bindings.size()); |
| 63 | return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); | 67 | ci.pBindings = bindings.data(); |
| 68 | return device.GetLogical().CreateDescriptorSetLayout(ci); | ||
| 64 | } | 69 | } |
| 65 | 70 | ||
| 66 | UniquePipelineLayout VKComputePipeline::CreatePipelineLayout() const { | 71 | vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const { |
| 67 | const vk::PipelineLayoutCreateInfo layout_ci({}, 1, &*descriptor_set_layout, 0, nullptr); | 72 | VkPipelineLayoutCreateInfo ci; |
| 68 | const auto dev = device.GetLogical(); | 73 | ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; |
| 69 | return dev.createPipelineLayoutUnique(layout_ci, nullptr, device.GetDispatchLoader()); | 74 | ci.pNext = nullptr; |
| 75 | ci.flags = 0; | ||
| 76 | ci.setLayoutCount = 1; | ||
| 77 | ci.pSetLayouts = descriptor_set_layout.address(); | ||
| 78 | ci.pushConstantRangeCount = 0; | ||
| 79 | ci.pPushConstantRanges = nullptr; | ||
| 80 | return device.GetLogical().CreatePipelineLayout(ci); | ||
| 70 | } | 81 | } |
| 71 | 82 | ||
| 72 | UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate() const { | 83 | vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const { |
| 73 | std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; | 84 | std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries; |
| 74 | u32 binding = 0; | 85 | u32 binding = 0; |
| 75 | u32 offset = 0; | 86 | u32 offset = 0; |
| 76 | FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); | 87 | FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries); |
| 77 | if (template_entries.empty()) { | 88 | if (template_entries.empty()) { |
| 78 | // If the shader doesn't use descriptor sets, skip template creation. | 89 | // If the shader doesn't use descriptor sets, skip template creation. |
| 79 | return UniqueDescriptorUpdateTemplate{}; | 90 | return {}; |
| 80 | } | 91 | } |
| 81 | 92 | ||
| 82 | const vk::DescriptorUpdateTemplateCreateInfo template_ci( | 93 | VkDescriptorUpdateTemplateCreateInfoKHR ci; |
| 83 | {}, static_cast<u32>(template_entries.size()), template_entries.data(), | 94 | ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; |
| 84 | vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, | 95 | ci.pNext = nullptr; |
| 85 | vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); | 96 | ci.flags = 0; |
| 86 | 97 | ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); | |
| 87 | const auto dev = device.GetLogical(); | 98 | ci.pDescriptorUpdateEntries = template_entries.data(); |
| 88 | const auto& dld = device.GetDispatchLoader(); | 99 | ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; |
| 89 | return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); | 100 | ci.descriptorSetLayout = *descriptor_set_layout; |
| 101 | ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; | ||
| 102 | ci.pipelineLayout = *layout; | ||
| 103 | ci.set = DESCRIPTOR_SET; | ||
| 104 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); | ||
| 90 | } | 105 | } |
| 91 | 106 | ||
| 92 | UniqueShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { | 107 | vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const { |
| 93 | const vk::ShaderModuleCreateInfo module_ci({}, code.size() * sizeof(u32), code.data()); | 108 | VkShaderModuleCreateInfo ci; |
| 94 | const auto dev = device.GetLogical(); | 109 | ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; |
| 95 | return dev.createShaderModuleUnique(module_ci, nullptr, device.GetDispatchLoader()); | 110 | ci.pNext = nullptr; |
| 111 | ci.flags = 0; | ||
| 112 | ci.codeSize = code.size() * sizeof(u32); | ||
| 113 | ci.pCode = code.data(); | ||
| 114 | return device.GetLogical().CreateShaderModule(ci); | ||
| 96 | } | 115 | } |
| 97 | 116 | ||
| 98 | UniquePipeline VKComputePipeline::CreatePipeline() const { | 117 | vk::Pipeline VKComputePipeline::CreatePipeline() const { |
| 99 | vk::PipelineShaderStageCreateInfo shader_stage_ci({}, vk::ShaderStageFlagBits::eCompute, | 118 | VkComputePipelineCreateInfo ci; |
| 100 | *shader_module, "main", nullptr); | 119 | VkPipelineShaderStageCreateInfo& stage_ci = ci.stage; |
| 101 | vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; | 120 | stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; |
| 121 | stage_ci.pNext = nullptr; | ||
| 122 | stage_ci.flags = 0; | ||
| 123 | stage_ci.stage = VK_SHADER_STAGE_COMPUTE_BIT; | ||
| 124 | stage_ci.module = *shader_module; | ||
| 125 | stage_ci.pName = "main"; | ||
| 126 | stage_ci.pSpecializationInfo = nullptr; | ||
| 127 | |||
| 128 | VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; | ||
| 129 | subgroup_size_ci.sType = | ||
| 130 | VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; | ||
| 131 | subgroup_size_ci.pNext = nullptr; | ||
| 102 | subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; | 132 | subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; |
| 103 | if (entries.uses_warps && device.IsGuestWarpSizeSupported(vk::ShaderStageFlagBits::eCompute)) { | 133 | |
| 104 | shader_stage_ci.pNext = &subgroup_size_ci; | 134 | if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) { |
| 135 | stage_ci.pNext = &subgroup_size_ci; | ||
| 105 | } | 136 | } |
| 106 | 137 | ||
| 107 | const vk::ComputePipelineCreateInfo create_info({}, shader_stage_ci, *layout, {}, 0); | 138 | ci.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; |
| 108 | const auto dev = device.GetLogical(); | 139 | ci.pNext = nullptr; |
| 109 | return dev.createComputePipelineUnique({}, create_info, nullptr, device.GetDispatchLoader()); | 140 | ci.flags = 0; |
| 141 | ci.layout = *layout; | ||
| 142 | ci.basePipelineHandle = nullptr; | ||
| 143 | ci.basePipelineIndex = 0; | ||
| 144 | return device.GetLogical().CreateComputePipeline(ci); | ||
| 110 | } | 145 | } |
| 111 | 146 | ||
| 112 | } // namespace Vulkan | 147 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index 22235c6c9..33b9af29e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -7,9 +7,9 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 12 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 11 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -25,42 +25,42 @@ public: | |||
| 25 | const SPIRVShader& shader); | 25 | const SPIRVShader& shader); |
| 26 | ~VKComputePipeline(); | 26 | ~VKComputePipeline(); |
| 27 | 27 | ||
| 28 | vk::DescriptorSet CommitDescriptorSet(); | 28 | VkDescriptorSet CommitDescriptorSet(); |
| 29 | 29 | ||
| 30 | vk::Pipeline GetHandle() const { | 30 | VkPipeline GetHandle() const { |
| 31 | return *pipeline; | 31 | return *pipeline; |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | vk::PipelineLayout GetLayout() const { | 34 | VkPipelineLayout GetLayout() const { |
| 35 | return *layout; | 35 | return *layout; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | const ShaderEntries& GetEntries() { | 38 | const ShaderEntries& GetEntries() const { |
| 39 | return entries; | 39 | return entries; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | private: | 42 | private: |
| 43 | UniqueDescriptorSetLayout CreateDescriptorSetLayout() const; | 43 | vk::DescriptorSetLayout CreateDescriptorSetLayout() const; |
| 44 | 44 | ||
| 45 | UniquePipelineLayout CreatePipelineLayout() const; | 45 | vk::PipelineLayout CreatePipelineLayout() const; |
| 46 | 46 | ||
| 47 | UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate() const; | 47 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const; |
| 48 | 48 | ||
| 49 | UniqueShaderModule CreateShaderModule(const std::vector<u32>& code) const; | 49 | vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const; |
| 50 | 50 | ||
| 51 | UniquePipeline CreatePipeline() const; | 51 | vk::Pipeline CreatePipeline() const; |
| 52 | 52 | ||
| 53 | const VKDevice& device; | 53 | const VKDevice& device; |
| 54 | VKScheduler& scheduler; | 54 | VKScheduler& scheduler; |
| 55 | ShaderEntries entries; | 55 | ShaderEntries entries; |
| 56 | 56 | ||
| 57 | UniqueDescriptorSetLayout descriptor_set_layout; | 57 | vk::DescriptorSetLayout descriptor_set_layout; |
| 58 | DescriptorAllocator descriptor_allocator; | 58 | DescriptorAllocator descriptor_allocator; |
| 59 | VKUpdateDescriptorQueue& update_descriptor_queue; | 59 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 60 | UniquePipelineLayout layout; | 60 | vk::PipelineLayout layout; |
| 61 | UniqueDescriptorUpdateTemplate descriptor_template; | 61 | vk::DescriptorUpdateTemplateKHR descriptor_template; |
| 62 | UniqueShaderModule shader_module; | 62 | vk::ShaderModule shader_module; |
| 63 | UniquePipeline pipeline; | 63 | vk::Pipeline pipeline; |
| 64 | }; | 64 | }; |
| 65 | 65 | ||
| 66 | } // namespace Vulkan | 66 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index cc7c281a0..e9d528aa6 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp | |||
| @@ -6,10 +6,10 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | 7 | ||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 9 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | 10 | #include "video_core/renderer_vulkan/vk_device.h" |
| 12 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 11 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -17,19 +17,18 @@ namespace Vulkan { | |||
| 17 | constexpr std::size_t SETS_GROW_RATE = 0x20; | 17 | constexpr std::size_t SETS_GROW_RATE = 0x20; |
| 18 | 18 | ||
| 19 | DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, | 19 | DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, |
| 20 | vk::DescriptorSetLayout layout) | 20 | VkDescriptorSetLayout layout) |
| 21 | : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} | 21 | : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} |
| 22 | 22 | ||
| 23 | DescriptorAllocator::~DescriptorAllocator() = default; | 23 | DescriptorAllocator::~DescriptorAllocator() = default; |
| 24 | 24 | ||
| 25 | vk::DescriptorSet DescriptorAllocator::Commit(VKFence& fence) { | 25 | VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) { |
| 26 | return *descriptors[CommitResource(fence)]; | 26 | const std::size_t index = CommitResource(fence); |
| 27 | return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; | ||
| 27 | } | 28 | } |
| 28 | 29 | ||
| 29 | void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { | 30 | void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { |
| 30 | auto new_sets = descriptor_pool.AllocateDescriptors(layout, end - begin); | 31 | descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); |
| 31 | descriptors.insert(descriptors.end(), std::make_move_iterator(new_sets.begin()), | ||
| 32 | std::make_move_iterator(new_sets.end())); | ||
| 33 | } | 32 | } |
| 34 | 33 | ||
| 35 | VKDescriptorPool::VKDescriptorPool(const VKDevice& device) | 34 | VKDescriptorPool::VKDescriptorPool(const VKDevice& device) |
| @@ -37,53 +36,50 @@ VKDescriptorPool::VKDescriptorPool(const VKDevice& device) | |||
| 37 | 36 | ||
| 38 | VKDescriptorPool::~VKDescriptorPool() = default; | 37 | VKDescriptorPool::~VKDescriptorPool() = default; |
| 39 | 38 | ||
| 40 | vk::DescriptorPool VKDescriptorPool::AllocateNewPool() { | 39 | vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { |
| 41 | static constexpr u32 num_sets = 0x20000; | 40 | static constexpr u32 num_sets = 0x20000; |
| 42 | static constexpr vk::DescriptorPoolSize pool_sizes[] = { | 41 | static constexpr VkDescriptorPoolSize pool_sizes[] = { |
| 43 | {vk::DescriptorType::eUniformBuffer, num_sets * 90}, | 42 | {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, num_sets * 90}, |
| 44 | {vk::DescriptorType::eStorageBuffer, num_sets * 60}, | 43 | {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, |
| 45 | {vk::DescriptorType::eUniformTexelBuffer, num_sets * 64}, | 44 | {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, |
| 46 | {vk::DescriptorType::eCombinedImageSampler, num_sets * 64}, | 45 | {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, |
| 47 | {vk::DescriptorType::eStorageImage, num_sets * 40}}; | 46 | {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; |
| 48 | 47 | ||
| 49 | const vk::DescriptorPoolCreateInfo create_info( | 48 | VkDescriptorPoolCreateInfo ci; |
| 50 | vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, num_sets, | 49 | ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; |
| 51 | static_cast<u32>(std::size(pool_sizes)), std::data(pool_sizes)); | 50 | ci.pNext = nullptr; |
| 52 | const auto dev = device.GetLogical(); | 51 | ci.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; |
| 53 | return *pools.emplace_back( | 52 | ci.maxSets = num_sets; |
| 54 | dev.createDescriptorPoolUnique(create_info, nullptr, device.GetDispatchLoader())); | 53 | ci.poolSizeCount = static_cast<u32>(std::size(pool_sizes)); |
| 54 | ci.pPoolSizes = std::data(pool_sizes); | ||
| 55 | return &pools.emplace_back(device.GetLogical().CreateDescriptorPool(ci)); | ||
| 55 | } | 56 | } |
| 56 | 57 | ||
| 57 | std::vector<UniqueDescriptorSet> VKDescriptorPool::AllocateDescriptors( | 58 | vk::DescriptorSets VKDescriptorPool::AllocateDescriptors(VkDescriptorSetLayout layout, |
| 58 | vk::DescriptorSetLayout layout, std::size_t count) { | 59 | std::size_t count) { |
| 59 | std::vector layout_copies(count, layout); | 60 | const std::vector layout_copies(count, layout); |
| 60 | vk::DescriptorSetAllocateInfo allocate_info(active_pool, static_cast<u32>(count), | 61 | VkDescriptorSetAllocateInfo ai; |
| 61 | layout_copies.data()); | 62 | ai.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; |
| 62 | 63 | ai.pNext = nullptr; | |
| 63 | std::vector<vk::DescriptorSet> sets(count); | 64 | ai.descriptorPool = **active_pool; |
| 64 | const auto dev = device.GetLogical(); | 65 | ai.descriptorSetCount = static_cast<u32>(count); |
| 65 | const auto& dld = device.GetDispatchLoader(); | 66 | ai.pSetLayouts = layout_copies.data(); |
| 66 | switch (const auto result = dev.allocateDescriptorSets(&allocate_info, sets.data(), dld)) { | 67 | |
| 67 | case vk::Result::eSuccess: | 68 | vk::DescriptorSets sets = active_pool->Allocate(ai); |
| 68 | break; | 69 | if (!sets.IsOutOfPoolMemory()) { |
| 69 | case vk::Result::eErrorOutOfPoolMemory: | 70 | return sets; |
| 70 | active_pool = AllocateNewPool(); | ||
| 71 | allocate_info.descriptorPool = active_pool; | ||
| 72 | if (dev.allocateDescriptorSets(&allocate_info, sets.data(), dld) == vk::Result::eSuccess) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | [[fallthrough]]; | ||
| 76 | default: | ||
| 77 | vk::throwResultException(result, "vk::Device::allocateDescriptorSetsUnique"); | ||
| 78 | } | 71 | } |
| 79 | 72 | ||
| 80 | vk::PoolFree deleter(dev, active_pool, dld); | 73 | // Our current pool is out of memory. Allocate a new one and retry |
| 81 | std::vector<UniqueDescriptorSet> unique_sets; | 74 | active_pool = AllocateNewPool(); |
| 82 | unique_sets.reserve(count); | 75 | ai.descriptorPool = **active_pool; |
| 83 | for (const auto set : sets) { | 76 | sets = active_pool->Allocate(ai); |
| 84 | unique_sets.push_back(UniqueDescriptorSet{set, deleter}); | 77 | if (!sets.IsOutOfPoolMemory()) { |
| 78 | return sets; | ||
| 85 | } | 79 | } |
| 86 | return unique_sets; | 80 | |
| 81 | // After allocating a new pool, we are out of memory again. We can't handle this from here. | ||
| 82 | throw vk::Exception(VK_ERROR_OUT_OF_POOL_MEMORY); | ||
| 87 | } | 83 | } |
| 88 | 84 | ||
| 89 | } // namespace Vulkan | 85 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index a441dbc0f..ab40c70f0 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h | |||
| @@ -8,8 +8,8 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 11 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -17,21 +17,21 @@ class VKDescriptorPool; | |||
| 17 | 17 | ||
| 18 | class DescriptorAllocator final : public VKFencedPool { | 18 | class DescriptorAllocator final : public VKFencedPool { |
| 19 | public: | 19 | public: |
| 20 | explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, vk::DescriptorSetLayout layout); | 20 | explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); |
| 21 | ~DescriptorAllocator() override; | 21 | ~DescriptorAllocator() override; |
| 22 | 22 | ||
| 23 | DescriptorAllocator(const DescriptorAllocator&) = delete; | 23 | DescriptorAllocator(const DescriptorAllocator&) = delete; |
| 24 | 24 | ||
| 25 | vk::DescriptorSet Commit(VKFence& fence); | 25 | VkDescriptorSet Commit(VKFence& fence); |
| 26 | 26 | ||
| 27 | protected: | 27 | protected: |
| 28 | void Allocate(std::size_t begin, std::size_t end) override; | 28 | void Allocate(std::size_t begin, std::size_t end) override; |
| 29 | 29 | ||
| 30 | private: | 30 | private: |
| 31 | VKDescriptorPool& descriptor_pool; | 31 | VKDescriptorPool& descriptor_pool; |
| 32 | const vk::DescriptorSetLayout layout; | 32 | const VkDescriptorSetLayout layout; |
| 33 | 33 | ||
| 34 | std::vector<UniqueDescriptorSet> descriptors; | 34 | std::vector<vk::DescriptorSets> descriptors_allocations; |
| 35 | }; | 35 | }; |
| 36 | 36 | ||
| 37 | class VKDescriptorPool final { | 37 | class VKDescriptorPool final { |
| @@ -42,15 +42,14 @@ public: | |||
| 42 | ~VKDescriptorPool(); | 42 | ~VKDescriptorPool(); |
| 43 | 43 | ||
| 44 | private: | 44 | private: |
| 45 | vk::DescriptorPool AllocateNewPool(); | 45 | vk::DescriptorPool* AllocateNewPool(); |
| 46 | 46 | ||
| 47 | std::vector<UniqueDescriptorSet> AllocateDescriptors(vk::DescriptorSetLayout layout, | 47 | vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); |
| 48 | std::size_t count); | ||
| 49 | 48 | ||
| 50 | const VKDevice& device; | 49 | const VKDevice& device; |
| 51 | 50 | ||
| 52 | std::vector<UniqueDescriptorPool> pools; | 51 | std::vector<vk::DescriptorPool> pools; |
| 53 | vk::DescriptorPool active_pool; | 52 | vk::DescriptorPool* active_pool; |
| 54 | }; | 53 | }; |
| 55 | 54 | ||
| 56 | } // namespace Vulkan \ No newline at end of file | 55 | } // namespace Vulkan \ No newline at end of file |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 28d2fbc4f..52d29e49d 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -6,14 +6,15 @@ | |||
| 6 | #include <chrono> | 6 | #include <chrono> |
| 7 | #include <cstdlib> | 7 | #include <cstdlib> |
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <set> | ||
| 10 | #include <string_view> | 9 | #include <string_view> |
| 11 | #include <thread> | 10 | #include <thread> |
| 11 | #include <unordered_set> | ||
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | |||
| 13 | #include "common/assert.h" | 14 | #include "common/assert.h" |
| 14 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 15 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_device.h" | 16 | #include "video_core/renderer_vulkan/vk_device.h" |
| 17 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 17 | 18 | ||
| 18 | namespace Vulkan { | 19 | namespace Vulkan { |
| 19 | 20 | ||
| @@ -21,49 +22,43 @@ namespace { | |||
| 21 | 22 | ||
| 22 | namespace Alternatives { | 23 | namespace Alternatives { |
| 23 | 24 | ||
| 24 | constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint, | 25 | constexpr std::array Depth24UnormS8_UINT = {VK_FORMAT_D32_SFLOAT_S8_UINT, |
| 25 | vk::Format::eD16UnormS8Uint, vk::Format{}}; | 26 | VK_FORMAT_D16_UNORM_S8_UINT, VkFormat{}}; |
| 26 | constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint, | 27 | constexpr std::array Depth16UnormS8_UINT = {VK_FORMAT_D24_UNORM_S8_UINT, |
| 27 | vk::Format::eD32SfloatS8Uint, vk::Format{}}; | 28 | VK_FORMAT_D32_SFLOAT_S8_UINT, VkFormat{}}; |
| 28 | 29 | ||
| 29 | } // namespace Alternatives | 30 | } // namespace Alternatives |
| 30 | 31 | ||
| 32 | constexpr std::array REQUIRED_EXTENSIONS = { | ||
| 33 | VK_KHR_SWAPCHAIN_EXTENSION_NAME, | ||
| 34 | VK_KHR_16BIT_STORAGE_EXTENSION_NAME, | ||
| 35 | VK_KHR_8BIT_STORAGE_EXTENSION_NAME, | ||
| 36 | VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, | ||
| 37 | VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, | ||
| 38 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | ||
| 39 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | ||
| 40 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | ||
| 41 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | ||
| 42 | }; | ||
| 43 | |||
| 31 | template <typename T> | 44 | template <typename T> |
| 32 | void SetNext(void**& next, T& data) { | 45 | void SetNext(void**& next, T& data) { |
| 33 | *next = &data; | 46 | *next = &data; |
| 34 | next = &data.pNext; | 47 | next = &data.pNext; |
| 35 | } | 48 | } |
| 36 | 49 | ||
| 37 | template <typename T> | 50 | constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { |
| 38 | T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { | ||
| 39 | vk::PhysicalDeviceFeatures2 features; | ||
| 40 | T extension_features; | ||
| 41 | features.pNext = &extension_features; | ||
| 42 | physical.getFeatures2(&features, dldi); | ||
| 43 | return extension_features; | ||
| 44 | } | ||
| 45 | |||
| 46 | template <typename T> | ||
| 47 | T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { | ||
| 48 | vk::PhysicalDeviceProperties2 properties; | ||
| 49 | T extension_properties; | ||
| 50 | properties.pNext = &extension_properties; | ||
| 51 | physical.getProperties2(&properties, dldi); | ||
| 52 | return extension_properties; | ||
| 53 | } | ||
| 54 | |||
| 55 | constexpr const vk::Format* GetFormatAlternatives(vk::Format format) { | ||
| 56 | switch (format) { | 51 | switch (format) { |
| 57 | case vk::Format::eD24UnormS8Uint: | 52 | case VK_FORMAT_D24_UNORM_S8_UINT: |
| 58 | return Alternatives::Depth24UnormS8Uint.data(); | 53 | return Alternatives::Depth24UnormS8_UINT.data(); |
| 59 | case vk::Format::eD16UnormS8Uint: | 54 | case VK_FORMAT_D16_UNORM_S8_UINT: |
| 60 | return Alternatives::Depth16UnormS8Uint.data(); | 55 | return Alternatives::Depth16UnormS8_UINT.data(); |
| 61 | default: | 56 | default: |
| 62 | return nullptr; | 57 | return nullptr; |
| 63 | } | 58 | } |
| 64 | } | 59 | } |
| 65 | 60 | ||
| 66 | vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, FormatType format_type) { | 61 | VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType format_type) { |
| 67 | switch (format_type) { | 62 | switch (format_type) { |
| 68 | case FormatType::Linear: | 63 | case FormatType::Linear: |
| 69 | return properties.linearTilingFeatures; | 64 | return properties.linearTilingFeatures; |
| @@ -76,79 +71,220 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format | |||
| 76 | } | 71 | } |
| 77 | } | 72 | } |
| 78 | 73 | ||
| 74 | std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | ||
| 75 | vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { | ||
| 76 | static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, | ||
| 77 | VK_FORMAT_A8B8G8R8_UINT_PACK32, | ||
| 78 | VK_FORMAT_A8B8G8R8_SNORM_PACK32, | ||
| 79 | VK_FORMAT_A8B8G8R8_SRGB_PACK32, | ||
| 80 | VK_FORMAT_B5G6R5_UNORM_PACK16, | ||
| 81 | VK_FORMAT_A2B10G10R10_UNORM_PACK32, | ||
| 82 | VK_FORMAT_A1R5G5B5_UNORM_PACK16, | ||
| 83 | VK_FORMAT_R32G32B32A32_SFLOAT, | ||
| 84 | VK_FORMAT_R32G32B32A32_UINT, | ||
| 85 | VK_FORMAT_R32G32_SFLOAT, | ||
| 86 | VK_FORMAT_R32G32_UINT, | ||
| 87 | VK_FORMAT_R16G16B16A16_UINT, | ||
| 88 | VK_FORMAT_R16G16B16A16_SNORM, | ||
| 89 | VK_FORMAT_R16G16B16A16_UNORM, | ||
| 90 | VK_FORMAT_R16G16_UNORM, | ||
| 91 | VK_FORMAT_R16G16_SNORM, | ||
| 92 | VK_FORMAT_R16G16_SFLOAT, | ||
| 93 | VK_FORMAT_R16_UNORM, | ||
| 94 | VK_FORMAT_R8G8B8A8_SRGB, | ||
| 95 | VK_FORMAT_R8G8_UNORM, | ||
| 96 | VK_FORMAT_R8G8_SNORM, | ||
| 97 | VK_FORMAT_R8_UNORM, | ||
| 98 | VK_FORMAT_R8_UINT, | ||
| 99 | VK_FORMAT_B10G11R11_UFLOAT_PACK32, | ||
| 100 | VK_FORMAT_R32_SFLOAT, | ||
| 101 | VK_FORMAT_R32_UINT, | ||
| 102 | VK_FORMAT_R32_SINT, | ||
| 103 | VK_FORMAT_R16_SFLOAT, | ||
| 104 | VK_FORMAT_R16G16B16A16_SFLOAT, | ||
| 105 | VK_FORMAT_B8G8R8A8_UNORM, | ||
| 106 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, | ||
| 107 | VK_FORMAT_D32_SFLOAT, | ||
| 108 | VK_FORMAT_D16_UNORM, | ||
| 109 | VK_FORMAT_D16_UNORM_S8_UINT, | ||
| 110 | VK_FORMAT_D24_UNORM_S8_UINT, | ||
| 111 | VK_FORMAT_D32_SFLOAT_S8_UINT, | ||
| 112 | VK_FORMAT_BC1_RGBA_UNORM_BLOCK, | ||
| 113 | VK_FORMAT_BC2_UNORM_BLOCK, | ||
| 114 | VK_FORMAT_BC3_UNORM_BLOCK, | ||
| 115 | VK_FORMAT_BC4_UNORM_BLOCK, | ||
| 116 | VK_FORMAT_BC5_UNORM_BLOCK, | ||
| 117 | VK_FORMAT_BC5_SNORM_BLOCK, | ||
| 118 | VK_FORMAT_BC7_UNORM_BLOCK, | ||
| 119 | VK_FORMAT_BC6H_UFLOAT_BLOCK, | ||
| 120 | VK_FORMAT_BC6H_SFLOAT_BLOCK, | ||
| 121 | VK_FORMAT_BC1_RGBA_SRGB_BLOCK, | ||
| 122 | VK_FORMAT_BC2_SRGB_BLOCK, | ||
| 123 | VK_FORMAT_BC3_SRGB_BLOCK, | ||
| 124 | VK_FORMAT_BC7_SRGB_BLOCK, | ||
| 125 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, | ||
| 126 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | ||
| 127 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | ||
| 128 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, | ||
| 129 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, | ||
| 130 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, | ||
| 131 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | ||
| 132 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | ||
| 133 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, | ||
| 134 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, | ||
| 135 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, | ||
| 136 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, | ||
| 137 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, | ||
| 138 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, | ||
| 139 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | ||
| 140 | VK_FORMAT_ASTC_8x6_SRGB_BLOCK, | ||
| 141 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, | ||
| 142 | VK_FORMAT_ASTC_6x5_SRGB_BLOCK, | ||
| 143 | VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; | ||
| 144 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | ||
| 145 | for (const auto format : formats) { | ||
| 146 | format_properties.emplace(format, physical.GetFormatProperties(format)); | ||
| 147 | } | ||
| 148 | return format_properties; | ||
| 149 | } | ||
| 150 | |||
| 79 | } // Anonymous namespace | 151 | } // Anonymous namespace |
| 80 | 152 | ||
| 81 | VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | 153 | VKDevice::VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, |
| 82 | vk::SurfaceKHR surface) | 154 | const vk::InstanceDispatch& dld) |
| 83 | : physical{physical}, properties{physical.getProperties(dldi)}, | 155 | : dld{dld}, physical{physical}, properties{physical.GetProperties()}, |
| 84 | format_properties{GetFormatProperties(dldi, physical)} { | 156 | format_properties{GetFormatProperties(physical, dld)} { |
| 85 | SetupFamilies(dldi, surface); | 157 | SetupFamilies(surface); |
| 86 | SetupFeatures(dldi); | 158 | SetupFeatures(); |
| 87 | } | 159 | } |
| 88 | 160 | ||
| 89 | VKDevice::~VKDevice() = default; | 161 | VKDevice::~VKDevice() = default; |
| 90 | 162 | ||
| 91 | bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { | 163 | bool VKDevice::Create() { |
| 92 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 164 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 93 | const std::vector extensions = LoadExtensions(dldi); | 165 | const std::vector extensions = LoadExtensions(); |
| 94 | 166 | ||
| 95 | vk::PhysicalDeviceFeatures2 features2; | 167 | VkPhysicalDeviceFeatures2 features2; |
| 168 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; | ||
| 169 | features2.pNext = nullptr; | ||
| 96 | void** next = &features2.pNext; | 170 | void** next = &features2.pNext; |
| 171 | |||
| 97 | auto& features = features2.features; | 172 | auto& features = features2.features; |
| 98 | features.vertexPipelineStoresAndAtomics = true; | 173 | features.robustBufferAccess = false; |
| 174 | features.fullDrawIndexUint32 = false; | ||
| 175 | features.imageCubeArray = false; | ||
| 99 | features.independentBlend = true; | 176 | features.independentBlend = true; |
| 177 | features.geometryShader = true; | ||
| 178 | features.tessellationShader = true; | ||
| 179 | features.sampleRateShading = false; | ||
| 180 | features.dualSrcBlend = false; | ||
| 181 | features.logicOp = false; | ||
| 182 | features.multiDrawIndirect = false; | ||
| 183 | features.drawIndirectFirstInstance = false; | ||
| 100 | features.depthClamp = true; | 184 | features.depthClamp = true; |
| 101 | features.samplerAnisotropy = true; | 185 | features.depthBiasClamp = true; |
| 186 | features.fillModeNonSolid = false; | ||
| 187 | features.depthBounds = false; | ||
| 188 | features.wideLines = false; | ||
| 102 | features.largePoints = true; | 189 | features.largePoints = true; |
| 190 | features.alphaToOne = false; | ||
| 103 | features.multiViewport = true; | 191 | features.multiViewport = true; |
| 104 | features.depthBiasClamp = true; | 192 | features.samplerAnisotropy = true; |
| 105 | features.geometryShader = true; | 193 | features.textureCompressionETC2 = false; |
| 106 | features.tessellationShader = true; | 194 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; |
| 195 | features.textureCompressionBC = false; | ||
| 107 | features.occlusionQueryPrecise = true; | 196 | features.occlusionQueryPrecise = true; |
| 197 | features.pipelineStatisticsQuery = false; | ||
| 198 | features.vertexPipelineStoresAndAtomics = true; | ||
| 108 | features.fragmentStoresAndAtomics = true; | 199 | features.fragmentStoresAndAtomics = true; |
| 200 | features.shaderTessellationAndGeometryPointSize = false; | ||
| 109 | features.shaderImageGatherExtended = true; | 201 | features.shaderImageGatherExtended = true; |
| 202 | features.shaderStorageImageExtendedFormats = false; | ||
| 203 | features.shaderStorageImageMultisample = false; | ||
| 110 | features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; | 204 | features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported; |
| 111 | features.shaderStorageImageWriteWithoutFormat = true; | 205 | features.shaderStorageImageWriteWithoutFormat = true; |
| 112 | features.textureCompressionASTC_LDR = is_optimal_astc_supported; | 206 | features.shaderUniformBufferArrayDynamicIndexing = false; |
| 113 | 207 | features.shaderSampledImageArrayDynamicIndexing = false; | |
| 114 | vk::PhysicalDevice16BitStorageFeaturesKHR bit16_storage; | 208 | features.shaderStorageBufferArrayDynamicIndexing = false; |
| 209 | features.shaderStorageImageArrayDynamicIndexing = false; | ||
| 210 | features.shaderClipDistance = false; | ||
| 211 | features.shaderCullDistance = false; | ||
| 212 | features.shaderFloat64 = false; | ||
| 213 | features.shaderInt64 = false; | ||
| 214 | features.shaderInt16 = false; | ||
| 215 | features.shaderResourceResidency = false; | ||
| 216 | features.shaderResourceMinLod = false; | ||
| 217 | features.sparseBinding = false; | ||
| 218 | features.sparseResidencyBuffer = false; | ||
| 219 | features.sparseResidencyImage2D = false; | ||
| 220 | features.sparseResidencyImage3D = false; | ||
| 221 | features.sparseResidency2Samples = false; | ||
| 222 | features.sparseResidency4Samples = false; | ||
| 223 | features.sparseResidency8Samples = false; | ||
| 224 | features.sparseResidency16Samples = false; | ||
| 225 | features.sparseResidencyAliased = false; | ||
| 226 | features.variableMultisampleRate = false; | ||
| 227 | features.inheritedQueries = false; | ||
| 228 | |||
| 229 | VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage; | ||
| 230 | bit16_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR; | ||
| 231 | bit16_storage.pNext = nullptr; | ||
| 232 | bit16_storage.storageBuffer16BitAccess = false; | ||
| 115 | bit16_storage.uniformAndStorageBuffer16BitAccess = true; | 233 | bit16_storage.uniformAndStorageBuffer16BitAccess = true; |
| 234 | bit16_storage.storagePushConstant16 = false; | ||
| 235 | bit16_storage.storageInputOutput16 = false; | ||
| 116 | SetNext(next, bit16_storage); | 236 | SetNext(next, bit16_storage); |
| 117 | 237 | ||
| 118 | vk::PhysicalDevice8BitStorageFeaturesKHR bit8_storage; | 238 | VkPhysicalDevice8BitStorageFeaturesKHR bit8_storage; |
| 239 | bit8_storage.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR; | ||
| 240 | bit8_storage.pNext = nullptr; | ||
| 241 | bit8_storage.storageBuffer8BitAccess = false; | ||
| 119 | bit8_storage.uniformAndStorageBuffer8BitAccess = true; | 242 | bit8_storage.uniformAndStorageBuffer8BitAccess = true; |
| 243 | bit8_storage.storagePushConstant8 = false; | ||
| 120 | SetNext(next, bit8_storage); | 244 | SetNext(next, bit8_storage); |
| 121 | 245 | ||
| 122 | vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; | 246 | VkPhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; |
| 247 | host_query_reset.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT; | ||
| 123 | host_query_reset.hostQueryReset = true; | 248 | host_query_reset.hostQueryReset = true; |
| 124 | SetNext(next, host_query_reset); | 249 | SetNext(next, host_query_reset); |
| 125 | 250 | ||
| 126 | vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; | 251 | VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; |
| 127 | if (is_float16_supported) { | 252 | if (is_float16_supported) { |
| 253 | float16_int8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; | ||
| 254 | float16_int8.pNext = nullptr; | ||
| 128 | float16_int8.shaderFloat16 = true; | 255 | float16_int8.shaderFloat16 = true; |
| 256 | float16_int8.shaderInt8 = false; | ||
| 129 | SetNext(next, float16_int8); | 257 | SetNext(next, float16_int8); |
| 130 | } else { | 258 | } else { |
| 131 | LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); | 259 | LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); |
| 132 | } | 260 | } |
| 133 | 261 | ||
| 134 | vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; | 262 | VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; |
| 135 | if (khr_uniform_buffer_standard_layout) { | 263 | if (khr_uniform_buffer_standard_layout) { |
| 264 | std430_layout.sType = | ||
| 265 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR; | ||
| 266 | std430_layout.pNext = nullptr; | ||
| 136 | std430_layout.uniformBufferStandardLayout = true; | 267 | std430_layout.uniformBufferStandardLayout = true; |
| 137 | SetNext(next, std430_layout); | 268 | SetNext(next, std430_layout); |
| 138 | } else { | 269 | } else { |
| 139 | LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); | 270 | LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); |
| 140 | } | 271 | } |
| 141 | 272 | ||
| 142 | vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; | 273 | VkPhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; |
| 143 | if (ext_index_type_uint8) { | 274 | if (ext_index_type_uint8) { |
| 275 | index_type_uint8.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT; | ||
| 276 | index_type_uint8.pNext = nullptr; | ||
| 144 | index_type_uint8.indexTypeUint8 = true; | 277 | index_type_uint8.indexTypeUint8 = true; |
| 145 | SetNext(next, index_type_uint8); | 278 | SetNext(next, index_type_uint8); |
| 146 | } else { | 279 | } else { |
| 147 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); | 280 | LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); |
| 148 | } | 281 | } |
| 149 | 282 | ||
| 150 | vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; | 283 | VkPhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback; |
| 151 | if (ext_transform_feedback) { | 284 | if (ext_transform_feedback) { |
| 285 | transform_feedback.sType = | ||
| 286 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; | ||
| 287 | transform_feedback.pNext = nullptr; | ||
| 152 | transform_feedback.transformFeedback = true; | 288 | transform_feedback.transformFeedback = true; |
| 153 | transform_feedback.geometryStreams = true; | 289 | transform_feedback.geometryStreams = true; |
| 154 | SetNext(next, transform_feedback); | 290 | SetNext(next, transform_feedback); |
| @@ -160,62 +296,48 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||
| 160 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 296 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 161 | } | 297 | } |
| 162 | 298 | ||
| 163 | vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, | 299 | logical = vk::Device::Create(physical, queue_cis, extensions, features2, dld); |
| 164 | nullptr, static_cast<u32>(extensions.size()), extensions.data(), | 300 | if (!logical) { |
| 165 | nullptr); | 301 | LOG_ERROR(Render_Vulkan, "Failed to create logical device"); |
| 166 | device_ci.pNext = &features2; | ||
| 167 | |||
| 168 | vk::Device dummy_logical; | ||
| 169 | if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { | ||
| 170 | LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); | ||
| 171 | return false; | 302 | return false; |
| 172 | } | 303 | } |
| 173 | 304 | ||
| 174 | dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); | ||
| 175 | logical = UniqueDevice( | ||
| 176 | dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); | ||
| 177 | |||
| 178 | CollectTelemetryParameters(); | 305 | CollectTelemetryParameters(); |
| 179 | 306 | ||
| 180 | graphics_queue = logical->getQueue(graphics_family, 0, dld); | 307 | graphics_queue = logical.GetQueue(graphics_family); |
| 181 | present_queue = logical->getQueue(present_family, 0, dld); | 308 | present_queue = logical.GetQueue(present_family); |
| 182 | return true; | 309 | return true; |
| 183 | } | 310 | } |
| 184 | 311 | ||
| 185 | vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, | 312 | VkFormat VKDevice::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 186 | vk::FormatFeatureFlags wanted_usage, | 313 | FormatType format_type) const { |
| 187 | FormatType format_type) const { | ||
| 188 | if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { | 314 | if (IsFormatSupported(wanted_format, wanted_usage, format_type)) { |
| 189 | return wanted_format; | 315 | return wanted_format; |
| 190 | } | 316 | } |
| 191 | // The wanted format is not supported by hardware, search for alternatives | 317 | // The wanted format is not supported by hardware, search for alternatives |
| 192 | const vk::Format* alternatives = GetFormatAlternatives(wanted_format); | 318 | const VkFormat* alternatives = GetFormatAlternatives(wanted_format); |
| 193 | if (alternatives == nullptr) { | 319 | if (alternatives == nullptr) { |
| 194 | UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " | 320 | UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " |
| 195 | "hardware does not support it", | 321 | "hardware does not support it", |
| 196 | vk::to_string(wanted_format), vk::to_string(wanted_usage), | 322 | wanted_format, wanted_usage, format_type); |
| 197 | static_cast<u32>(format_type)); | ||
| 198 | return wanted_format; | 323 | return wanted_format; |
| 199 | } | 324 | } |
| 200 | 325 | ||
| 201 | std::size_t i = 0; | 326 | std::size_t i = 0; |
| 202 | for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; | 327 | for (VkFormat alternative = *alternatives; alternative; alternative = alternatives[++i]) { |
| 203 | alternative = alternatives[++i]) { | ||
| 204 | if (!IsFormatSupported(alternative, wanted_usage, format_type)) { | 328 | if (!IsFormatSupported(alternative, wanted_usage, format_type)) { |
| 205 | continue; | 329 | continue; |
| 206 | } | 330 | } |
| 207 | LOG_WARNING(Render_Vulkan, | 331 | LOG_WARNING(Render_Vulkan, |
| 208 | "Emulating format={} with alternative format={} with usage={} and type={}", | 332 | "Emulating format={} with alternative format={} with usage={} and type={}", |
| 209 | static_cast<u32>(wanted_format), static_cast<u32>(alternative), | 333 | wanted_format, alternative, wanted_usage, format_type); |
| 210 | static_cast<u32>(wanted_usage), static_cast<u32>(format_type)); | ||
| 211 | return alternative; | 334 | return alternative; |
| 212 | } | 335 | } |
| 213 | 336 | ||
| 214 | // No alternatives found, panic | 337 | // No alternatives found, panic |
| 215 | UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " | 338 | UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " |
| 216 | "doesn't support any of the alternatives", | 339 | "doesn't support any of the alternatives", |
| 217 | static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), | 340 | wanted_format, wanted_usage, format_type); |
| 218 | static_cast<u32>(format_type)); | ||
| 219 | return wanted_format; | 341 | return wanted_format; |
| 220 | } | 342 | } |
| 221 | 343 | ||
| @@ -229,35 +351,39 @@ void VKDevice::ReportLoss() const { | |||
| 229 | return; | 351 | return; |
| 230 | } | 352 | } |
| 231 | 353 | ||
| 232 | [[maybe_unused]] const std::vector data = graphics_queue.getCheckpointDataNV(dld); | 354 | [[maybe_unused]] const std::vector data = graphics_queue.GetCheckpointDataNV(dld); |
| 233 | // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be | 355 | // Catch here in debug builds (or with optimizations disabled) the last graphics pipeline to be |
| 234 | // executed. It can be done on a debugger by evaluating the expression: | 356 | // executed. It can be done on a debugger by evaluating the expression: |
| 235 | // *(VKGraphicsPipeline*)data[0] | 357 | // *(VKGraphicsPipeline*)data[0] |
| 236 | } | 358 | } |
| 237 | 359 | ||
| 238 | bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, | 360 | bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const { |
| 239 | const vk::DispatchLoaderDynamic& dldi) const { | ||
| 240 | // Disable for now to avoid converting ASTC twice. | 361 | // Disable for now to avoid converting ASTC twice. |
| 241 | return false; | ||
| 242 | static constexpr std::array astc_formats = { | 362 | static constexpr std::array astc_formats = { |
| 243 | vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock, | 363 | VK_FORMAT_ASTC_4x4_UNORM_BLOCK, VK_FORMAT_ASTC_4x4_SRGB_BLOCK, |
| 244 | vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock, | 364 | VK_FORMAT_ASTC_5x4_UNORM_BLOCK, VK_FORMAT_ASTC_5x4_SRGB_BLOCK, |
| 245 | vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, | 365 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, VK_FORMAT_ASTC_5x5_SRGB_BLOCK, |
| 246 | vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, | 366 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, VK_FORMAT_ASTC_6x5_SRGB_BLOCK, |
| 247 | vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, | 367 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, VK_FORMAT_ASTC_6x6_SRGB_BLOCK, |
| 248 | vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, | 368 | VK_FORMAT_ASTC_8x5_UNORM_BLOCK, VK_FORMAT_ASTC_8x5_SRGB_BLOCK, |
| 249 | vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock, | 369 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, VK_FORMAT_ASTC_8x6_SRGB_BLOCK, |
| 250 | vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock, | 370 | VK_FORMAT_ASTC_8x8_UNORM_BLOCK, VK_FORMAT_ASTC_8x8_SRGB_BLOCK, |
| 251 | vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock}; | 371 | VK_FORMAT_ASTC_10x5_UNORM_BLOCK, VK_FORMAT_ASTC_10x5_SRGB_BLOCK, |
| 372 | VK_FORMAT_ASTC_10x6_UNORM_BLOCK, VK_FORMAT_ASTC_10x6_SRGB_BLOCK, | ||
| 373 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | ||
| 374 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, VK_FORMAT_ASTC_10x10_SRGB_BLOCK, | ||
| 375 | VK_FORMAT_ASTC_12x10_UNORM_BLOCK, VK_FORMAT_ASTC_12x10_SRGB_BLOCK, | ||
| 376 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, VK_FORMAT_ASTC_12x12_SRGB_BLOCK, | ||
| 377 | }; | ||
| 252 | if (!features.textureCompressionASTC_LDR) { | 378 | if (!features.textureCompressionASTC_LDR) { |
| 253 | return false; | 379 | return false; |
| 254 | } | 380 | } |
| 255 | const auto format_feature_usage{ | 381 | const auto format_feature_usage{ |
| 256 | vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | | 382 | VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | |
| 257 | vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | | 383 | VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | |
| 258 | vk::FormatFeatureFlagBits::eTransferDst}; | 384 | VK_FORMAT_FEATURE_TRANSFER_DST_BIT}; |
| 259 | for (const auto format : astc_formats) { | 385 | for (const auto format : astc_formats) { |
| 260 | const auto format_properties{physical.getFormatProperties(format, dldi)}; | 386 | const auto format_properties{physical.GetFormatProperties(format)}; |
| 261 | if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { | 387 | if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { |
| 262 | return false; | 388 | return false; |
| 263 | } | 389 | } |
| @@ -265,62 +391,49 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features | |||
| 265 | return true; | 391 | return true; |
| 266 | } | 392 | } |
| 267 | 393 | ||
| 268 | bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | 394 | bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 269 | FormatType format_type) const { | 395 | FormatType format_type) const { |
| 270 | const auto it = format_properties.find(wanted_format); | 396 | const auto it = format_properties.find(wanted_format); |
| 271 | if (it == format_properties.end()) { | 397 | if (it == format_properties.end()) { |
| 272 | UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format)); | 398 | UNIMPLEMENTED_MSG("Unimplemented format query={}", wanted_format); |
| 273 | return true; | 399 | return true; |
| 274 | } | 400 | } |
| 275 | const auto supported_usage = GetFormatFeatures(it->second, format_type); | 401 | const auto supported_usage = GetFormatFeatures(it->second, format_type); |
| 276 | return (supported_usage & wanted_usage) == wanted_usage; | 402 | return (supported_usage & wanted_usage) == wanted_usage; |
| 277 | } | 403 | } |
| 278 | 404 | ||
| 279 | bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | 405 | bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) { |
| 280 | vk::SurfaceKHR surface) { | ||
| 281 | bool is_suitable = true; | 406 | bool is_suitable = true; |
| 407 | std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; | ||
| 282 | 408 | ||
| 283 | constexpr std::array required_extensions = { | 409 | for (const auto& prop : physical.EnumerateDeviceExtensionProperties()) { |
| 284 | VK_KHR_SWAPCHAIN_EXTENSION_NAME, | 410 | for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { |
| 285 | VK_KHR_16BIT_STORAGE_EXTENSION_NAME, | ||
| 286 | VK_KHR_8BIT_STORAGE_EXTENSION_NAME, | ||
| 287 | VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, | ||
| 288 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | ||
| 289 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | ||
| 290 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | ||
| 291 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | ||
| 292 | }; | ||
| 293 | std::bitset<required_extensions.size()> available_extensions{}; | ||
| 294 | |||
| 295 | for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | ||
| 296 | for (std::size_t i = 0; i < required_extensions.size(); ++i) { | ||
| 297 | if (available_extensions[i]) { | 411 | if (available_extensions[i]) { |
| 298 | continue; | 412 | continue; |
| 299 | } | 413 | } |
| 300 | available_extensions[i] = | 414 | const std::string_view name{prop.extensionName}; |
| 301 | required_extensions[i] == std::string_view{prop.extensionName}; | 415 | available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; |
| 302 | } | 416 | } |
| 303 | } | 417 | } |
| 304 | if (!available_extensions.all()) { | 418 | if (!available_extensions.all()) { |
| 305 | for (std::size_t i = 0; i < required_extensions.size(); ++i) { | 419 | for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { |
| 306 | if (available_extensions[i]) { | 420 | if (available_extensions[i]) { |
| 307 | continue; | 421 | continue; |
| 308 | } | 422 | } |
| 309 | LOG_ERROR(Render_Vulkan, "Missing required extension: {}", required_extensions[i]); | 423 | LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); |
| 310 | is_suitable = false; | 424 | is_suitable = false; |
| 311 | } | 425 | } |
| 312 | } | 426 | } |
| 313 | 427 | ||
| 314 | bool has_graphics{}, has_present{}; | 428 | bool has_graphics{}, has_present{}; |
| 315 | const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); | 429 | const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); |
| 316 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { | 430 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { |
| 317 | const auto& family = queue_family_properties[i]; | 431 | const auto& family = queue_family_properties[i]; |
| 318 | if (family.queueCount == 0) { | 432 | if (family.queueCount == 0) { |
| 319 | continue; | 433 | continue; |
| 320 | } | 434 | } |
| 321 | has_graphics |= | 435 | has_graphics |= family.queueFlags & VK_QUEUE_GRAPHICS_BIT; |
| 322 | (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); | 436 | has_present |= physical.GetSurfaceSupportKHR(i, surface); |
| 323 | has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; | ||
| 324 | } | 437 | } |
| 325 | if (!has_graphics || !has_present) { | 438 | if (!has_graphics || !has_present) { |
| 326 | LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); | 439 | LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); |
| @@ -328,7 +441,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
| 328 | } | 441 | } |
| 329 | 442 | ||
| 330 | // TODO(Rodrigo): Check if the device matches all requeriments. | 443 | // TODO(Rodrigo): Check if the device matches all requeriments. |
| 331 | const auto properties{physical.getProperties(dldi)}; | 444 | const auto properties{physical.GetProperties()}; |
| 332 | const auto& limits{properties.limits}; | 445 | const auto& limits{properties.limits}; |
| 333 | 446 | ||
| 334 | constexpr u32 required_ubo_size = 65536; | 447 | constexpr u32 required_ubo_size = 65536; |
| @@ -345,7 +458,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
| 345 | is_suitable = false; | 458 | is_suitable = false; |
| 346 | } | 459 | } |
| 347 | 460 | ||
| 348 | const auto features{physical.getFeatures(dldi)}; | 461 | const auto features{physical.GetFeatures()}; |
| 349 | const std::array feature_report = { | 462 | const std::array feature_report = { |
| 350 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), | 463 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), |
| 351 | std::make_pair(features.independentBlend, "independentBlend"), | 464 | std::make_pair(features.independentBlend, "independentBlend"), |
| @@ -377,9 +490,9 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||
| 377 | return is_suitable; | 490 | return is_suitable; |
| 378 | } | 491 | } |
| 379 | 492 | ||
| 380 | std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { | 493 | std::vector<const char*> VKDevice::LoadExtensions() { |
| 381 | std::vector<const char*> extensions; | 494 | std::vector<const char*> extensions; |
| 382 | const auto Test = [&](const vk::ExtensionProperties& extension, | 495 | const auto Test = [&](const VkExtensionProperties& extension, |
| 383 | std::optional<std::reference_wrapper<bool>> status, const char* name, | 496 | std::optional<std::reference_wrapper<bool>> status, const char* name, |
| 384 | bool push) { | 497 | bool push) { |
| 385 | if (extension.extensionName != std::string_view(name)) { | 498 | if (extension.extensionName != std::string_view(name)) { |
| @@ -393,22 +506,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 393 | } | 506 | } |
| 394 | }; | 507 | }; |
| 395 | 508 | ||
| 396 | extensions.reserve(15); | 509 | extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); |
| 397 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | 510 | extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); |
| 398 | extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | 511 | |
| 399 | extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | ||
| 400 | extensions.push_back(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME); | ||
| 401 | extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); | ||
| 402 | extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); | ||
| 403 | extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); | ||
| 404 | extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); | ||
| 405 | |||
| 406 | [[maybe_unused]] const bool nsight = | ||
| 407 | std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||
| 408 | bool has_khr_shader_float16_int8{}; | 512 | bool has_khr_shader_float16_int8{}; |
| 409 | bool has_ext_subgroup_size_control{}; | 513 | bool has_ext_subgroup_size_control{}; |
| 410 | bool has_ext_transform_feedback{}; | 514 | bool has_ext_transform_feedback{}; |
| 411 | for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { | 515 | for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) { |
| 412 | Test(extension, khr_uniform_buffer_standard_layout, | 516 | Test(extension, khr_uniform_buffer_standard_layout, |
| 413 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | 517 | VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); |
| 414 | Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, | 518 | Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, |
| @@ -428,38 +532,67 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 428 | } | 532 | } |
| 429 | } | 533 | } |
| 430 | 534 | ||
| 535 | VkPhysicalDeviceFeatures2KHR features; | ||
| 536 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; | ||
| 537 | |||
| 538 | VkPhysicalDeviceProperties2KHR properties; | ||
| 539 | properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | ||
| 540 | |||
| 431 | if (has_khr_shader_float16_int8) { | 541 | if (has_khr_shader_float16_int8) { |
| 432 | is_float16_supported = | 542 | VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8_features; |
| 433 | GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; | 543 | float16_int8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR; |
| 544 | float16_int8_features.pNext = nullptr; | ||
| 545 | features.pNext = &float16_int8_features; | ||
| 546 | |||
| 547 | physical.GetFeatures2KHR(features); | ||
| 548 | is_float16_supported = float16_int8_features.shaderFloat16; | ||
| 434 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | 549 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); |
| 435 | } | 550 | } |
| 436 | 551 | ||
| 437 | if (has_ext_subgroup_size_control) { | 552 | if (has_ext_subgroup_size_control) { |
| 438 | const auto features = | 553 | VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; |
| 439 | GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); | 554 | subgroup_features.sType = |
| 440 | const auto properties = | 555 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT; |
| 441 | GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi); | 556 | subgroup_features.pNext = nullptr; |
| 442 | 557 | features.pNext = &subgroup_features; | |
| 443 | is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize; | 558 | physical.GetFeatures2KHR(features); |
| 444 | 559 | ||
| 445 | if (features.subgroupSizeControl && properties.minSubgroupSize <= GuestWarpSize && | 560 | VkPhysicalDeviceSubgroupSizeControlPropertiesEXT subgroup_properties; |
| 446 | properties.maxSubgroupSize >= GuestWarpSize) { | 561 | subgroup_properties.sType = |
| 562 | VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT; | ||
| 563 | subgroup_properties.pNext = nullptr; | ||
| 564 | properties.pNext = &subgroup_properties; | ||
| 565 | physical.GetProperties2KHR(properties); | ||
| 566 | |||
| 567 | is_warp_potentially_bigger = subgroup_properties.maxSubgroupSize > GuestWarpSize; | ||
| 568 | |||
| 569 | if (subgroup_features.subgroupSizeControl && | ||
| 570 | subgroup_properties.minSubgroupSize <= GuestWarpSize && | ||
| 571 | subgroup_properties.maxSubgroupSize >= GuestWarpSize) { | ||
| 447 | extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); | 572 | extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); |
| 448 | guest_warp_stages = properties.requiredSubgroupSizeStages; | 573 | guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages; |
| 449 | } | 574 | } |
| 450 | } else { | 575 | } else { |
| 451 | is_warp_potentially_bigger = true; | 576 | is_warp_potentially_bigger = true; |
| 452 | } | 577 | } |
| 453 | 578 | ||
| 454 | if (has_ext_transform_feedback) { | 579 | if (has_ext_transform_feedback) { |
| 455 | const auto features = | 580 | VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; |
| 456 | GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); | 581 | tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; |
| 457 | const auto properties = | 582 | tfb_features.pNext = nullptr; |
| 458 | GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); | 583 | features.pNext = &tfb_features; |
| 459 | 584 | physical.GetFeatures2KHR(features); | |
| 460 | if (features.transformFeedback && features.geometryStreams && | 585 | |
| 461 | properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && | 586 | VkPhysicalDeviceTransformFeedbackPropertiesEXT tfb_properties; |
| 462 | properties.transformFeedbackQueries && properties.transformFeedbackDraw) { | 587 | tfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; |
| 588 | tfb_properties.pNext = nullptr; | ||
| 589 | properties.pNext = &tfb_properties; | ||
| 590 | physical.GetProperties2KHR(properties); | ||
| 591 | |||
| 592 | if (tfb_features.transformFeedback && tfb_features.geometryStreams && | ||
| 593 | tfb_properties.maxTransformFeedbackStreams >= 4 && | ||
| 594 | tfb_properties.maxTransformFeedbackBuffers && tfb_properties.transformFeedbackQueries && | ||
| 595 | tfb_properties.transformFeedbackDraw) { | ||
| 463 | extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); | 596 | extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); |
| 464 | ext_transform_feedback = true; | 597 | ext_transform_feedback = true; |
| 465 | } | 598 | } |
| @@ -468,10 +601,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||
| 468 | return extensions; | 601 | return extensions; |
| 469 | } | 602 | } |
| 470 | 603 | ||
| 471 | void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { | 604 | void VKDevice::SetupFamilies(VkSurfaceKHR surface) { |
| 472 | std::optional<u32> graphics_family_, present_family_; | 605 | std::optional<u32> graphics_family_, present_family_; |
| 473 | 606 | ||
| 474 | const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); | 607 | const std::vector queue_family_properties = physical.GetQueueFamilyProperties(); |
| 475 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { | 608 | for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { |
| 476 | if (graphics_family_ && present_family_) | 609 | if (graphics_family_ && present_family_) |
| 477 | break; | 610 | break; |
| @@ -480,10 +613,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 480 | if (queue_family.queueCount == 0) | 613 | if (queue_family.queueCount == 0) |
| 481 | continue; | 614 | continue; |
| 482 | 615 | ||
| 483 | if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) | 616 | if (queue_family.queueFlags & VK_QUEUE_GRAPHICS_BIT) { |
| 484 | graphics_family_ = i; | 617 | graphics_family_ = i; |
| 485 | if (physical.getSurfaceSupportKHR(i, surface, dldi)) | 618 | } |
| 619 | if (physical.GetSurfaceSupportKHR(i, surface)) { | ||
| 486 | present_family_ = i; | 620 | present_family_ = i; |
| 621 | } | ||
| 487 | } | 622 | } |
| 488 | ASSERT(graphics_family_ && present_family_); | 623 | ASSERT(graphics_family_ && present_family_); |
| 489 | 624 | ||
| @@ -491,111 +626,49 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK | |||
| 491 | present_family = *present_family_; | 626 | present_family = *present_family_; |
| 492 | } | 627 | } |
| 493 | 628 | ||
| 494 | void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { | 629 | void VKDevice::SetupFeatures() { |
| 495 | const auto supported_features{physical.getFeatures(dldi)}; | 630 | const auto supported_features{physical.GetFeatures()}; |
| 496 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; | 631 | is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; |
| 497 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); | 632 | is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); |
| 498 | } | 633 | } |
| 499 | 634 | ||
| 500 | void VKDevice::CollectTelemetryParameters() { | 635 | void VKDevice::CollectTelemetryParameters() { |
| 501 | const auto driver = GetProperties<vk::PhysicalDeviceDriverPropertiesKHR>(physical, dld); | 636 | VkPhysicalDeviceDriverPropertiesKHR driver; |
| 637 | driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR; | ||
| 638 | driver.pNext = nullptr; | ||
| 639 | |||
| 640 | VkPhysicalDeviceProperties2KHR properties; | ||
| 641 | properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | ||
| 642 | properties.pNext = &driver; | ||
| 643 | physical.GetProperties2KHR(properties); | ||
| 644 | |||
| 502 | driver_id = driver.driverID; | 645 | driver_id = driver.driverID; |
| 503 | vendor_name = driver.driverName; | 646 | vendor_name = driver.driverName; |
| 504 | 647 | ||
| 505 | const auto extensions = physical.enumerateDeviceExtensionProperties(nullptr, dld); | 648 | const std::vector extensions = physical.EnumerateDeviceExtensionProperties(); |
| 506 | reported_extensions.reserve(std::size(extensions)); | 649 | reported_extensions.reserve(std::size(extensions)); |
| 507 | for (const auto& extension : extensions) { | 650 | for (const auto& extension : extensions) { |
| 508 | reported_extensions.push_back(extension.extensionName); | 651 | reported_extensions.push_back(extension.extensionName); |
| 509 | } | 652 | } |
| 510 | } | 653 | } |
| 511 | 654 | ||
| 512 | std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { | 655 | std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const { |
| 513 | static const float QUEUE_PRIORITY = 1.0f; | 656 | static constexpr float QUEUE_PRIORITY = 1.0f; |
| 514 | 657 | ||
| 515 | std::set<u32> unique_queue_families = {graphics_family, present_family}; | 658 | std::unordered_set<u32> unique_queue_families = {graphics_family, present_family}; |
| 516 | std::vector<vk::DeviceQueueCreateInfo> queue_cis; | 659 | std::vector<VkDeviceQueueCreateInfo> queue_cis; |
| 517 | 660 | ||
| 518 | for (u32 queue_family : unique_queue_families) | 661 | for (const u32 queue_family : unique_queue_families) { |
| 519 | queue_cis.push_back({{}, queue_family, 1, &QUEUE_PRIORITY}); | 662 | VkDeviceQueueCreateInfo& ci = queue_cis.emplace_back(); |
| 663 | ci.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; | ||
| 664 | ci.pNext = nullptr; | ||
| 665 | ci.flags = 0; | ||
| 666 | ci.queueFamilyIndex = queue_family; | ||
| 667 | ci.queueCount = 1; | ||
| 668 | ci.pQueuePriorities = &QUEUE_PRIORITY; | ||
| 669 | } | ||
| 520 | 670 | ||
| 521 | return queue_cis; | 671 | return queue_cis; |
| 522 | } | 672 | } |
| 523 | 673 | ||
| 524 | std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( | ||
| 525 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { | ||
| 526 | static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, | ||
| 527 | vk::Format::eA8B8G8R8UintPack32, | ||
| 528 | vk::Format::eA8B8G8R8SnormPack32, | ||
| 529 | vk::Format::eA8B8G8R8SrgbPack32, | ||
| 530 | vk::Format::eB5G6R5UnormPack16, | ||
| 531 | vk::Format::eA2B10G10R10UnormPack32, | ||
| 532 | vk::Format::eA1R5G5B5UnormPack16, | ||
| 533 | vk::Format::eR32G32B32A32Sfloat, | ||
| 534 | vk::Format::eR32G32B32A32Uint, | ||
| 535 | vk::Format::eR32G32Sfloat, | ||
| 536 | vk::Format::eR32G32Uint, | ||
| 537 | vk::Format::eR16G16B16A16Uint, | ||
| 538 | vk::Format::eR16G16B16A16Snorm, | ||
| 539 | vk::Format::eR16G16B16A16Unorm, | ||
| 540 | vk::Format::eR16G16Unorm, | ||
| 541 | vk::Format::eR16G16Snorm, | ||
| 542 | vk::Format::eR16G16Sfloat, | ||
| 543 | vk::Format::eR16Unorm, | ||
| 544 | vk::Format::eR8G8B8A8Srgb, | ||
| 545 | vk::Format::eR8G8Unorm, | ||
| 546 | vk::Format::eR8G8Snorm, | ||
| 547 | vk::Format::eR8Unorm, | ||
| 548 | vk::Format::eR8Uint, | ||
| 549 | vk::Format::eB10G11R11UfloatPack32, | ||
| 550 | vk::Format::eR32Sfloat, | ||
| 551 | vk::Format::eR32Uint, | ||
| 552 | vk::Format::eR32Sint, | ||
| 553 | vk::Format::eR16Sfloat, | ||
| 554 | vk::Format::eR16G16B16A16Sfloat, | ||
| 555 | vk::Format::eB8G8R8A8Unorm, | ||
| 556 | vk::Format::eR4G4B4A4UnormPack16, | ||
| 557 | vk::Format::eD32Sfloat, | ||
| 558 | vk::Format::eD16Unorm, | ||
| 559 | vk::Format::eD16UnormS8Uint, | ||
| 560 | vk::Format::eD24UnormS8Uint, | ||
| 561 | vk::Format::eD32SfloatS8Uint, | ||
| 562 | vk::Format::eBc1RgbaUnormBlock, | ||
| 563 | vk::Format::eBc2UnormBlock, | ||
| 564 | vk::Format::eBc3UnormBlock, | ||
| 565 | vk::Format::eBc4UnormBlock, | ||
| 566 | vk::Format::eBc5UnormBlock, | ||
| 567 | vk::Format::eBc5SnormBlock, | ||
| 568 | vk::Format::eBc7UnormBlock, | ||
| 569 | vk::Format::eBc6HUfloatBlock, | ||
| 570 | vk::Format::eBc6HSfloatBlock, | ||
| 571 | vk::Format::eBc1RgbaSrgbBlock, | ||
| 572 | vk::Format::eBc2SrgbBlock, | ||
| 573 | vk::Format::eBc3SrgbBlock, | ||
| 574 | vk::Format::eBc7SrgbBlock, | ||
| 575 | vk::Format::eAstc4x4SrgbBlock, | ||
| 576 | vk::Format::eAstc8x8SrgbBlock, | ||
| 577 | vk::Format::eAstc8x5SrgbBlock, | ||
| 578 | vk::Format::eAstc5x4SrgbBlock, | ||
| 579 | vk::Format::eAstc5x5UnormBlock, | ||
| 580 | vk::Format::eAstc5x5SrgbBlock, | ||
| 581 | vk::Format::eAstc10x8UnormBlock, | ||
| 582 | vk::Format::eAstc10x8SrgbBlock, | ||
| 583 | vk::Format::eAstc6x6UnormBlock, | ||
| 584 | vk::Format::eAstc6x6SrgbBlock, | ||
| 585 | vk::Format::eAstc10x10UnormBlock, | ||
| 586 | vk::Format::eAstc10x10SrgbBlock, | ||
| 587 | vk::Format::eAstc12x12UnormBlock, | ||
| 588 | vk::Format::eAstc12x12SrgbBlock, | ||
| 589 | vk::Format::eAstc8x6UnormBlock, | ||
| 590 | vk::Format::eAstc8x6SrgbBlock, | ||
| 591 | vk::Format::eAstc6x5UnormBlock, | ||
| 592 | vk::Format::eAstc6x5SrgbBlock, | ||
| 593 | vk::Format::eE5B9G9R9UfloatPack32}; | ||
| 594 | std::unordered_map<vk::Format, vk::FormatProperties> format_properties; | ||
| 595 | for (const auto format : formats) { | ||
| 596 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); | ||
| 597 | } | ||
| 598 | return format_properties; | ||
| 599 | } | ||
| 600 | |||
| 601 | } // namespace Vulkan | 674 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 6e656517f..60d64572a 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h | |||
| @@ -8,8 +8,9 @@ | |||
| 8 | #include <string_view> | 8 | #include <string_view> |
| 9 | #include <unordered_map> | 9 | #include <unordered_map> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | |||
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 12 | #include "video_core/renderer_vulkan/declarations.h" | 13 | #include "video_core/renderer_vulkan/wrapper.h" |
| 13 | 14 | ||
| 14 | namespace Vulkan { | 15 | namespace Vulkan { |
| 15 | 16 | ||
| @@ -22,12 +23,12 @@ const u32 GuestWarpSize = 32; | |||
| 22 | /// Handles data specific to a physical device. | 23 | /// Handles data specific to a physical device. |
| 23 | class VKDevice final { | 24 | class VKDevice final { |
| 24 | public: | 25 | public: |
| 25 | explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | 26 | explicit VKDevice(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, |
| 26 | vk::SurfaceKHR surface); | 27 | const vk::InstanceDispatch& dld); |
| 27 | ~VKDevice(); | 28 | ~VKDevice(); |
| 28 | 29 | ||
| 29 | /// Initializes the device. Returns true on success. | 30 | /// Initializes the device. Returns true on success. |
| 30 | bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); | 31 | bool Create(); |
| 31 | 32 | ||
| 32 | /** | 33 | /** |
| 33 | * Returns a format supported by the device for the passed requeriments. | 34 | * Returns a format supported by the device for the passed requeriments. |
| @@ -36,20 +37,20 @@ public: | |||
| 36 | * @param format_type Format type usage. | 37 | * @param format_type Format type usage. |
| 37 | * @returns A format supported by the device. | 38 | * @returns A format supported by the device. |
| 38 | */ | 39 | */ |
| 39 | vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | 40 | VkFormat GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 40 | FormatType format_type) const; | 41 | FormatType format_type) const; |
| 41 | 42 | ||
| 42 | /// Reports a device loss. | 43 | /// Reports a device loss. |
| 43 | void ReportLoss() const; | 44 | void ReportLoss() const; |
| 44 | 45 | ||
| 45 | /// Returns the dispatch loader with direct function pointers of the device. | 46 | /// Returns the dispatch loader with direct function pointers of the device. |
| 46 | const vk::DispatchLoaderDynamic& GetDispatchLoader() const { | 47 | const vk::DeviceDispatch& GetDispatchLoader() const { |
| 47 | return dld; | 48 | return dld; |
| 48 | } | 49 | } |
| 49 | 50 | ||
| 50 | /// Returns the logical device. | 51 | /// Returns the logical device. |
| 51 | vk::Device GetLogical() const { | 52 | const vk::Device& GetLogical() const { |
| 52 | return logical.get(); | 53 | return logical; |
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | /// Returns the physical device. | 56 | /// Returns the physical device. |
| @@ -79,7 +80,7 @@ public: | |||
| 79 | 80 | ||
| 80 | /// Returns true if the device is integrated with the host CPU. | 81 | /// Returns true if the device is integrated with the host CPU. |
| 81 | bool IsIntegrated() const { | 82 | bool IsIntegrated() const { |
| 82 | return properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu; | 83 | return properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU; |
| 83 | } | 84 | } |
| 84 | 85 | ||
| 85 | /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. | 86 | /// Returns the current Vulkan API version provided in Vulkan-formatted version numbers. |
| @@ -98,27 +99,27 @@ public: | |||
| 98 | } | 99 | } |
| 99 | 100 | ||
| 100 | /// Returns the driver ID. | 101 | /// Returns the driver ID. |
| 101 | vk::DriverIdKHR GetDriverID() const { | 102 | VkDriverIdKHR GetDriverID() const { |
| 102 | return driver_id; | 103 | return driver_id; |
| 103 | } | 104 | } |
| 104 | 105 | ||
| 105 | /// Returns uniform buffer alignment requeriment. | 106 | /// Returns uniform buffer alignment requeriment. |
| 106 | vk::DeviceSize GetUniformBufferAlignment() const { | 107 | VkDeviceSize GetUniformBufferAlignment() const { |
| 107 | return properties.limits.minUniformBufferOffsetAlignment; | 108 | return properties.limits.minUniformBufferOffsetAlignment; |
| 108 | } | 109 | } |
| 109 | 110 | ||
| 110 | /// Returns storage alignment requeriment. | 111 | /// Returns storage alignment requeriment. |
| 111 | vk::DeviceSize GetStorageBufferAlignment() const { | 112 | VkDeviceSize GetStorageBufferAlignment() const { |
| 112 | return properties.limits.minStorageBufferOffsetAlignment; | 113 | return properties.limits.minStorageBufferOffsetAlignment; |
| 113 | } | 114 | } |
| 114 | 115 | ||
| 115 | /// Returns the maximum range for storage buffers. | 116 | /// Returns the maximum range for storage buffers. |
| 116 | vk::DeviceSize GetMaxStorageBufferRange() const { | 117 | VkDeviceSize GetMaxStorageBufferRange() const { |
| 117 | return properties.limits.maxStorageBufferRange; | 118 | return properties.limits.maxStorageBufferRange; |
| 118 | } | 119 | } |
| 119 | 120 | ||
| 120 | /// Returns the maximum size for push constants. | 121 | /// Returns the maximum size for push constants. |
| 121 | vk::DeviceSize GetMaxPushConstantsSize() const { | 122 | VkDeviceSize GetMaxPushConstantsSize() const { |
| 122 | return properties.limits.maxPushConstantsSize; | 123 | return properties.limits.maxPushConstantsSize; |
| 123 | } | 124 | } |
| 124 | 125 | ||
| @@ -138,8 +139,8 @@ public: | |||
| 138 | } | 139 | } |
| 139 | 140 | ||
| 140 | /// Returns true if the device can be forced to use the guest warp size. | 141 | /// Returns true if the device can be forced to use the guest warp size. |
| 141 | bool IsGuestWarpSizeSupported(vk::ShaderStageFlagBits stage) const { | 142 | bool IsGuestWarpSizeSupported(VkShaderStageFlagBits stage) const { |
| 142 | return (guest_warp_stages & stage) != vk::ShaderStageFlags{}; | 143 | return guest_warp_stages & stage; |
| 143 | } | 144 | } |
| 144 | 145 | ||
| 145 | /// Returns true if formatless image load is supported. | 146 | /// Returns true if formatless image load is supported. |
| @@ -188,50 +189,44 @@ public: | |||
| 188 | } | 189 | } |
| 189 | 190 | ||
| 190 | /// Checks if the physical device is suitable. | 191 | /// Checks if the physical device is suitable. |
| 191 | static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, | 192 | static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); |
| 192 | vk::SurfaceKHR surface); | ||
| 193 | 193 | ||
| 194 | private: | 194 | private: |
| 195 | /// Loads extensions into a vector and stores available ones in this object. | 195 | /// Loads extensions into a vector and stores available ones in this object. |
| 196 | std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); | 196 | std::vector<const char*> LoadExtensions(); |
| 197 | 197 | ||
| 198 | /// Sets up queue families. | 198 | /// Sets up queue families. |
| 199 | void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); | 199 | void SetupFamilies(VkSurfaceKHR surface); |
| 200 | 200 | ||
| 201 | /// Sets up device features. | 201 | /// Sets up device features. |
| 202 | void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); | 202 | void SetupFeatures(); |
| 203 | 203 | ||
| 204 | /// Collects telemetry information from the device. | 204 | /// Collects telemetry information from the device. |
| 205 | void CollectTelemetryParameters(); | 205 | void CollectTelemetryParameters(); |
| 206 | 206 | ||
| 207 | /// Returns a list of queue initialization descriptors. | 207 | /// Returns a list of queue initialization descriptors. |
| 208 | std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; | 208 | std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; |
| 209 | 209 | ||
| 210 | /// Returns true if ASTC textures are natively supported. | 210 | /// Returns true if ASTC textures are natively supported. |
| 211 | bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, | 211 | bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const; |
| 212 | const vk::DispatchLoaderDynamic& dldi) const; | ||
| 213 | 212 | ||
| 214 | /// Returns true if a format is supported. | 213 | /// Returns true if a format is supported. |
| 215 | bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, | 214 | bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 216 | FormatType format_type) const; | 215 | FormatType format_type) const; |
| 217 | 216 | ||
| 218 | /// Returns the device properties for Vulkan formats. | 217 | vk::DeviceDispatch dld; ///< Device function pointers. |
| 219 | static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( | 218 | vk::PhysicalDevice physical; ///< Physical device. |
| 220 | const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); | 219 | VkPhysicalDeviceProperties properties; ///< Device properties. |
| 221 | 220 | vk::Device logical; ///< Logical device. | |
| 222 | const vk::PhysicalDevice physical; ///< Physical device. | 221 | vk::Queue graphics_queue; ///< Main graphics queue. |
| 223 | vk::DispatchLoaderDynamic dld; ///< Device function pointers. | 222 | vk::Queue present_queue; ///< Main present queue. |
| 224 | vk::PhysicalDeviceProperties properties; ///< Device properties. | 223 | u32 graphics_family{}; ///< Main graphics queue family index. |
| 225 | UniqueDevice logical; ///< Logical device. | 224 | u32 present_family{}; ///< Main present queue family index. |
| 226 | vk::Queue graphics_queue; ///< Main graphics queue. | 225 | VkDriverIdKHR driver_id{}; ///< Driver ID. |
| 227 | vk::Queue present_queue; ///< Main present queue. | 226 | VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed |
| 228 | u32 graphics_family{}; ///< Main graphics queue family index. | 227 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. |
| 229 | u32 present_family{}; ///< Main present queue family index. | 228 | bool is_float16_supported{}; ///< Support for float16 arithmetics. |
| 230 | vk::DriverIdKHR driver_id{}; ///< Driver ID. | 229 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. |
| 231 | vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed | ||
| 232 | bool is_optimal_astc_supported{}; ///< Support for native ASTC. | ||
| 233 | bool is_float16_supported{}; ///< Support for float16 arithmetics. | ||
| 234 | bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. | ||
| 235 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. | 230 | bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. |
| 236 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. | 231 | bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. |
| 237 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. | 232 | bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. |
| @@ -245,7 +240,7 @@ private: | |||
| 245 | std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. | 240 | std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. |
| 246 | 241 | ||
| 247 | /// Format properties dictionary. | 242 | /// Format properties dictionary. |
| 248 | std::unordered_map<vk::Format, vk::FormatProperties> format_properties; | 243 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
| 249 | }; | 244 | }; |
| 250 | 245 | ||
| 251 | } // namespace Vulkan | 246 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 6a02403c1..b540b838d 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -2,11 +2,13 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 6 | #include <cstring> | ||
| 5 | #include <vector> | 7 | #include <vector> |
| 8 | |||
| 6 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 7 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 8 | #include "common/microprofile.h" | 11 | #include "common/microprofile.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 12 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 13 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 12 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 14 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| @@ -16,6 +18,7 @@ | |||
| 16 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 18 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 20 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 21 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 19 | 22 | ||
| 20 | namespace Vulkan { | 23 | namespace Vulkan { |
| 21 | 24 | ||
| @@ -23,21 +26,26 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); | |||
| 23 | 26 | ||
| 24 | namespace { | 27 | namespace { |
| 25 | 28 | ||
| 26 | vk::StencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { | 29 | VkStencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { |
| 27 | return vk::StencilOpState(MaxwellToVK::StencilOp(face.action_stencil_fail), | 30 | VkStencilOpState state; |
| 28 | MaxwellToVK::StencilOp(face.action_depth_pass), | 31 | state.failOp = MaxwellToVK::StencilOp(face.action_stencil_fail); |
| 29 | MaxwellToVK::StencilOp(face.action_depth_fail), | 32 | state.passOp = MaxwellToVK::StencilOp(face.action_depth_pass); |
| 30 | MaxwellToVK::ComparisonOp(face.test_func), 0, 0, 0); | 33 | state.depthFailOp = MaxwellToVK::StencilOp(face.action_depth_fail); |
| 34 | state.compareOp = MaxwellToVK::ComparisonOp(face.test_func); | ||
| 35 | state.compareMask = 0; | ||
| 36 | state.writeMask = 0; | ||
| 37 | state.reference = 0; | ||
| 38 | return state; | ||
| 31 | } | 39 | } |
| 32 | 40 | ||
| 33 | bool SupportsPrimitiveRestart(vk::PrimitiveTopology topology) { | 41 | bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) { |
| 34 | static constexpr std::array unsupported_topologies = { | 42 | static constexpr std::array unsupported_topologies = { |
| 35 | vk::PrimitiveTopology::ePointList, | 43 | VK_PRIMITIVE_TOPOLOGY_POINT_LIST, |
| 36 | vk::PrimitiveTopology::eLineList, | 44 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST, |
| 37 | vk::PrimitiveTopology::eTriangleList, | 45 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, |
| 38 | vk::PrimitiveTopology::eLineListWithAdjacency, | 46 | VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY, |
| 39 | vk::PrimitiveTopology::eTriangleListWithAdjacency, | 47 | VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY, |
| 40 | vk::PrimitiveTopology::ePatchList}; | 48 | VK_PRIMITIVE_TOPOLOGY_PATCH_LIST}; |
| 41 | return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), | 49 | return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies), |
| 42 | topology) == std::end(unsupported_topologies); | 50 | topology) == std::end(unsupported_topologies); |
| 43 | } | 51 | } |
| @@ -49,7 +57,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche | |||
| 49 | VKUpdateDescriptorQueue& update_descriptor_queue, | 57 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 50 | VKRenderPassCache& renderpass_cache, | 58 | VKRenderPassCache& renderpass_cache, |
| 51 | const GraphicsPipelineCacheKey& key, | 59 | const GraphicsPipelineCacheKey& key, |
| 52 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | 60 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 53 | const SPIRVProgram& program) | 61 | const SPIRVProgram& program) |
| 54 | : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, | 62 | : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, |
| 55 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, | 63 | descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, |
| @@ -63,7 +71,7 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche | |||
| 63 | 71 | ||
| 64 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; | 72 | VKGraphicsPipeline::~VKGraphicsPipeline() = default; |
| 65 | 73 | ||
| 66 | vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { | 74 | VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { |
| 67 | if (!descriptor_template) { | 75 | if (!descriptor_template) { |
| 68 | return {}; | 76 | return {}; |
| 69 | } | 77 | } |
| @@ -72,27 +80,32 @@ vk::DescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { | |||
| 72 | return set; | 80 | return set; |
| 73 | } | 81 | } |
| 74 | 82 | ||
| 75 | UniqueDescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( | 83 | vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout( |
| 76 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const { | 84 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const { |
| 77 | const vk::DescriptorSetLayoutCreateInfo descriptor_set_layout_ci( | 85 | VkDescriptorSetLayoutCreateInfo ci; |
| 78 | {}, static_cast<u32>(bindings.size()), bindings.data()); | 86 | ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; |
| 79 | 87 | ci.pNext = nullptr; | |
| 80 | const auto dev = device.GetLogical(); | 88 | ci.flags = 0; |
| 81 | const auto& dld = device.GetDispatchLoader(); | 89 | ci.bindingCount = bindings.size(); |
| 82 | return dev.createDescriptorSetLayoutUnique(descriptor_set_layout_ci, nullptr, dld); | 90 | ci.pBindings = bindings.data(); |
| 91 | return device.GetLogical().CreateDescriptorSetLayout(ci); | ||
| 83 | } | 92 | } |
| 84 | 93 | ||
| 85 | UniquePipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { | 94 | vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const { |
| 86 | const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &*descriptor_set_layout, 0, | 95 | VkPipelineLayoutCreateInfo ci; |
| 87 | nullptr); | 96 | ci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; |
| 88 | const auto dev = device.GetLogical(); | 97 | ci.pNext = nullptr; |
| 89 | const auto& dld = device.GetDispatchLoader(); | 98 | ci.flags = 0; |
| 90 | return dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld); | 99 | ci.setLayoutCount = 1; |
| 100 | ci.pSetLayouts = descriptor_set_layout.address(); | ||
| 101 | ci.pushConstantRangeCount = 0; | ||
| 102 | ci.pPushConstantRanges = nullptr; | ||
| 103 | return device.GetLogical().CreatePipelineLayout(ci); | ||
| 91 | } | 104 | } |
| 92 | 105 | ||
| 93 | UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplate( | 106 | vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate( |
| 94 | const SPIRVProgram& program) const { | 107 | const SPIRVProgram& program) const { |
| 95 | std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; | 108 | std::vector<VkDescriptorUpdateTemplateEntry> template_entries; |
| 96 | u32 binding = 0; | 109 | u32 binding = 0; |
| 97 | u32 offset = 0; | 110 | u32 offset = 0; |
| 98 | for (const auto& stage : program) { | 111 | for (const auto& stage : program) { |
| @@ -102,38 +115,47 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat | |||
| 102 | } | 115 | } |
| 103 | if (template_entries.empty()) { | 116 | if (template_entries.empty()) { |
| 104 | // If the shader doesn't use descriptor sets, skip template creation. | 117 | // If the shader doesn't use descriptor sets, skip template creation. |
| 105 | return UniqueDescriptorUpdateTemplate{}; | 118 | return {}; |
| 106 | } | 119 | } |
| 107 | 120 | ||
| 108 | const vk::DescriptorUpdateTemplateCreateInfo template_ci( | 121 | VkDescriptorUpdateTemplateCreateInfoKHR ci; |
| 109 | {}, static_cast<u32>(template_entries.size()), template_entries.data(), | 122 | ci.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR; |
| 110 | vk::DescriptorUpdateTemplateType::eDescriptorSet, *descriptor_set_layout, | 123 | ci.pNext = nullptr; |
| 111 | vk::PipelineBindPoint::eGraphics, *layout, DESCRIPTOR_SET); | 124 | ci.flags = 0; |
| 112 | 125 | ci.descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()); | |
| 113 | const auto dev = device.GetLogical(); | 126 | ci.pDescriptorUpdateEntries = template_entries.data(); |
| 114 | const auto& dld = device.GetDispatchLoader(); | 127 | ci.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; |
| 115 | return dev.createDescriptorUpdateTemplateUnique(template_ci, nullptr, dld); | 128 | ci.descriptorSetLayout = *descriptor_set_layout; |
| 129 | ci.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; | ||
| 130 | ci.pipelineLayout = *layout; | ||
| 131 | ci.set = DESCRIPTOR_SET; | ||
| 132 | return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci); | ||
| 116 | } | 133 | } |
| 117 | 134 | ||
| 118 | std::vector<UniqueShaderModule> VKGraphicsPipeline::CreateShaderModules( | 135 | std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( |
| 119 | const SPIRVProgram& program) const { | 136 | const SPIRVProgram& program) const { |
| 120 | std::vector<UniqueShaderModule> modules; | 137 | VkShaderModuleCreateInfo ci; |
| 121 | const auto dev = device.GetLogical(); | 138 | ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; |
| 122 | const auto& dld = device.GetDispatchLoader(); | 139 | ci.pNext = nullptr; |
| 140 | ci.flags = 0; | ||
| 141 | |||
| 142 | std::vector<vk::ShaderModule> modules; | ||
| 143 | modules.reserve(Maxwell::MaxShaderStage); | ||
| 123 | for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { | 144 | for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) { |
| 124 | const auto& stage = program[i]; | 145 | const auto& stage = program[i]; |
| 125 | if (!stage) { | 146 | if (!stage) { |
| 126 | continue; | 147 | continue; |
| 127 | } | 148 | } |
| 128 | const vk::ShaderModuleCreateInfo module_ci({}, stage->code.size() * sizeof(u32), | 149 | |
| 129 | stage->code.data()); | 150 | ci.codeSize = stage->code.size() * sizeof(u32); |
| 130 | modules.emplace_back(dev.createShaderModuleUnique(module_ci, nullptr, dld)); | 151 | ci.pCode = stage->code.data(); |
| 152 | modules.push_back(device.GetLogical().CreateShaderModule(ci)); | ||
| 131 | } | 153 | } |
| 132 | return modules; | 154 | return modules; |
| 133 | } | 155 | } |
| 134 | 156 | ||
| 135 | UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, | 157 | vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, |
| 136 | const SPIRVProgram& program) const { | 158 | const SPIRVProgram& program) const { |
| 137 | const auto& vi = fixed_state.vertex_input; | 159 | const auto& vi = fixed_state.vertex_input; |
| 138 | const auto& ia = fixed_state.input_assembly; | 160 | const auto& ia = fixed_state.input_assembly; |
| 139 | const auto& ds = fixed_state.depth_stencil; | 161 | const auto& ds = fixed_state.depth_stencil; |
| @@ -141,19 +163,26 @@ UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& render | |||
| 141 | const auto& ts = fixed_state.tessellation; | 163 | const auto& ts = fixed_state.tessellation; |
| 142 | const auto& rs = fixed_state.rasterizer; | 164 | const auto& rs = fixed_state.rasterizer; |
| 143 | 165 | ||
| 144 | std::vector<vk::VertexInputBindingDescription> vertex_bindings; | 166 | std::vector<VkVertexInputBindingDescription> vertex_bindings; |
| 145 | std::vector<vk::VertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; | 167 | std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors; |
| 146 | for (std::size_t i = 0; i < vi.num_bindings; ++i) { | 168 | for (std::size_t i = 0; i < vi.num_bindings; ++i) { |
| 147 | const auto& binding = vi.bindings[i]; | 169 | const auto& binding = vi.bindings[i]; |
| 148 | const bool instanced = binding.divisor != 0; | 170 | const bool instanced = binding.divisor != 0; |
| 149 | const auto rate = instanced ? vk::VertexInputRate::eInstance : vk::VertexInputRate::eVertex; | 171 | const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; |
| 150 | vertex_bindings.emplace_back(binding.index, binding.stride, rate); | 172 | |
| 173 | auto& vertex_binding = vertex_bindings.emplace_back(); | ||
| 174 | vertex_binding.binding = binding.index; | ||
| 175 | vertex_binding.stride = binding.stride; | ||
| 176 | vertex_binding.inputRate = rate; | ||
| 177 | |||
| 151 | if (instanced) { | 178 | if (instanced) { |
| 152 | vertex_binding_divisors.emplace_back(binding.index, binding.divisor); | 179 | auto& binding_divisor = vertex_binding_divisors.emplace_back(); |
| 180 | binding_divisor.binding = binding.index; | ||
| 181 | binding_divisor.divisor = binding.divisor; | ||
| 153 | } | 182 | } |
| 154 | } | 183 | } |
| 155 | 184 | ||
| 156 | std::vector<vk::VertexInputAttributeDescription> vertex_attributes; | 185 | std::vector<VkVertexInputAttributeDescription> vertex_attributes; |
| 157 | const auto& input_attributes = program[0]->entries.attributes; | 186 | const auto& input_attributes = program[0]->entries.attributes; |
| 158 | for (std::size_t i = 0; i < vi.num_attributes; ++i) { | 187 | for (std::size_t i = 0; i < vi.num_attributes; ++i) { |
| 159 | const auto& attribute = vi.attributes[i]; | 188 | const auto& attribute = vi.attributes[i]; |
| @@ -161,109 +190,194 @@ UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& render | |||
| 161 | // Skip attributes not used by the vertex shaders. | 190 | // Skip attributes not used by the vertex shaders. |
| 162 | continue; | 191 | continue; |
| 163 | } | 192 | } |
| 164 | vertex_attributes.emplace_back(attribute.index, attribute.buffer, | 193 | auto& vertex_attribute = vertex_attributes.emplace_back(); |
| 165 | MaxwellToVK::VertexFormat(attribute.type, attribute.size), | 194 | vertex_attribute.location = attribute.index; |
| 166 | attribute.offset); | 195 | vertex_attribute.binding = attribute.buffer; |
| 196 | vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size); | ||
| 197 | vertex_attribute.offset = attribute.offset; | ||
| 167 | } | 198 | } |
| 168 | 199 | ||
| 169 | vk::PipelineVertexInputStateCreateInfo vertex_input_ci( | 200 | VkPipelineVertexInputStateCreateInfo vertex_input_ci; |
| 170 | {}, static_cast<u32>(vertex_bindings.size()), vertex_bindings.data(), | 201 | vertex_input_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; |
| 171 | static_cast<u32>(vertex_attributes.size()), vertex_attributes.data()); | 202 | vertex_input_ci.pNext = nullptr; |
| 172 | 203 | vertex_input_ci.flags = 0; | |
| 173 | const vk::PipelineVertexInputDivisorStateCreateInfoEXT vertex_input_divisor_ci( | 204 | vertex_input_ci.vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()); |
| 174 | static_cast<u32>(vertex_binding_divisors.size()), vertex_binding_divisors.data()); | 205 | vertex_input_ci.pVertexBindingDescriptions = vertex_bindings.data(); |
| 206 | vertex_input_ci.vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()); | ||
| 207 | vertex_input_ci.pVertexAttributeDescriptions = vertex_attributes.data(); | ||
| 208 | |||
| 209 | VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci; | ||
| 210 | input_divisor_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; | ||
| 211 | input_divisor_ci.pNext = nullptr; | ||
| 212 | input_divisor_ci.vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()); | ||
| 213 | input_divisor_ci.pVertexBindingDivisors = vertex_binding_divisors.data(); | ||
| 175 | if (!vertex_binding_divisors.empty()) { | 214 | if (!vertex_binding_divisors.empty()) { |
| 176 | vertex_input_ci.pNext = &vertex_input_divisor_ci; | 215 | vertex_input_ci.pNext = &input_divisor_ci; |
| 177 | } | 216 | } |
| 178 | 217 | ||
| 179 | const auto primitive_topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); | 218 | VkPipelineInputAssemblyStateCreateInfo input_assembly_ci; |
| 180 | const vk::PipelineInputAssemblyStateCreateInfo input_assembly_ci( | 219 | input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; |
| 181 | {}, primitive_topology, | 220 | input_assembly_ci.pNext = nullptr; |
| 182 | ia.primitive_restart_enable && SupportsPrimitiveRestart(primitive_topology)); | 221 | input_assembly_ci.flags = 0; |
| 183 | 222 | input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); | |
| 184 | const vk::PipelineTessellationStateCreateInfo tessellation_ci({}, ts.patch_control_points); | 223 | input_assembly_ci.primitiveRestartEnable = |
| 185 | 224 | ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology); | |
| 186 | const vk::PipelineViewportStateCreateInfo viewport_ci({}, Maxwell::NumViewports, nullptr, | 225 | |
| 187 | Maxwell::NumViewports, nullptr); | 226 | VkPipelineTessellationStateCreateInfo tessellation_ci; |
| 188 | 227 | tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; | |
| 189 | // TODO(Rodrigo): Find out what's the default register value for front face | 228 | tessellation_ci.pNext = nullptr; |
| 190 | const vk::PipelineRasterizationStateCreateInfo rasterizer_ci( | 229 | tessellation_ci.flags = 0; |
| 191 | {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill, | 230 | tessellation_ci.patchControlPoints = ts.patch_control_points; |
| 192 | rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone, | 231 | |
| 193 | MaxwellToVK::FrontFace(rs.front_face), rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f); | 232 | VkPipelineViewportStateCreateInfo viewport_ci; |
| 194 | 233 | viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; | |
| 195 | const vk::PipelineMultisampleStateCreateInfo multisampling_ci( | 234 | viewport_ci.pNext = nullptr; |
| 196 | {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false); | 235 | viewport_ci.flags = 0; |
| 197 | 236 | viewport_ci.viewportCount = Maxwell::NumViewports; | |
| 198 | const vk::CompareOp depth_test_compare = ds.depth_test_enable | 237 | viewport_ci.pViewports = nullptr; |
| 199 | ? MaxwellToVK::ComparisonOp(ds.depth_test_function) | 238 | viewport_ci.scissorCount = Maxwell::NumViewports; |
| 200 | : vk::CompareOp::eAlways; | 239 | viewport_ci.pScissors = nullptr; |
| 201 | 240 | ||
| 202 | const vk::PipelineDepthStencilStateCreateInfo depth_stencil_ci( | 241 | VkPipelineRasterizationStateCreateInfo rasterization_ci; |
| 203 | {}, ds.depth_test_enable, ds.depth_write_enable, depth_test_compare, ds.depth_bounds_enable, | 242 | rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; |
| 204 | ds.stencil_enable, GetStencilFaceState(ds.front_stencil), | 243 | rasterization_ci.pNext = nullptr; |
| 205 | GetStencilFaceState(ds.back_stencil), 0.0f, 0.0f); | 244 | rasterization_ci.flags = 0; |
| 206 | 245 | rasterization_ci.depthClampEnable = rs.depth_clamp_enable; | |
| 207 | std::array<vk::PipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | 246 | rasterization_ci.rasterizerDiscardEnable = VK_FALSE; |
| 247 | rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; | ||
| 248 | rasterization_ci.cullMode = | ||
| 249 | rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE; | ||
| 250 | rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face); | ||
| 251 | rasterization_ci.depthBiasEnable = rs.depth_bias_enable; | ||
| 252 | rasterization_ci.depthBiasConstantFactor = 0.0f; | ||
| 253 | rasterization_ci.depthBiasClamp = 0.0f; | ||
| 254 | rasterization_ci.depthBiasSlopeFactor = 0.0f; | ||
| 255 | rasterization_ci.lineWidth = 1.0f; | ||
| 256 | |||
| 257 | VkPipelineMultisampleStateCreateInfo multisample_ci; | ||
| 258 | multisample_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; | ||
| 259 | multisample_ci.pNext = nullptr; | ||
| 260 | multisample_ci.flags = 0; | ||
| 261 | multisample_ci.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; | ||
| 262 | multisample_ci.sampleShadingEnable = VK_FALSE; | ||
| 263 | multisample_ci.minSampleShading = 0.0f; | ||
| 264 | multisample_ci.pSampleMask = nullptr; | ||
| 265 | multisample_ci.alphaToCoverageEnable = VK_FALSE; | ||
| 266 | multisample_ci.alphaToOneEnable = VK_FALSE; | ||
| 267 | |||
| 268 | VkPipelineDepthStencilStateCreateInfo depth_stencil_ci; | ||
| 269 | depth_stencil_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; | ||
| 270 | depth_stencil_ci.pNext = nullptr; | ||
| 271 | depth_stencil_ci.flags = 0; | ||
| 272 | depth_stencil_ci.depthTestEnable = ds.depth_test_enable; | ||
| 273 | depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; | ||
| 274 | depth_stencil_ci.depthCompareOp = ds.depth_test_enable | ||
| 275 | ? MaxwellToVK::ComparisonOp(ds.depth_test_function) | ||
| 276 | : VK_COMPARE_OP_ALWAYS; | ||
| 277 | depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; | ||
| 278 | depth_stencil_ci.stencilTestEnable = ds.stencil_enable; | ||
| 279 | depth_stencil_ci.front = GetStencilFaceState(ds.front_stencil); | ||
| 280 | depth_stencil_ci.back = GetStencilFaceState(ds.back_stencil); | ||
| 281 | depth_stencil_ci.minDepthBounds = 0.0f; | ||
| 282 | depth_stencil_ci.maxDepthBounds = 0.0f; | ||
| 283 | |||
| 284 | std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments; | ||
| 208 | const std::size_t num_attachments = | 285 | const std::size_t num_attachments = |
| 209 | std::min(cd.attachments_count, renderpass_params.color_attachments.size()); | 286 | std::min(cd.attachments_count, renderpass_params.color_attachments.size()); |
| 210 | for (std::size_t i = 0; i < num_attachments; ++i) { | 287 | for (std::size_t i = 0; i < num_attachments; ++i) { |
| 211 | constexpr std::array component_table{ | 288 | static constexpr std::array component_table = { |
| 212 | vk::ColorComponentFlagBits::eR, vk::ColorComponentFlagBits::eG, | 289 | VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, |
| 213 | vk::ColorComponentFlagBits::eB, vk::ColorComponentFlagBits::eA}; | 290 | VK_COLOR_COMPONENT_A_BIT}; |
| 214 | const auto& blend = cd.attachments[i]; | 291 | const auto& blend = cd.attachments[i]; |
| 215 | 292 | ||
| 216 | vk::ColorComponentFlags color_components{}; | 293 | VkColorComponentFlags color_components = 0; |
| 217 | for (std::size_t j = 0; j < component_table.size(); ++j) { | 294 | for (std::size_t j = 0; j < component_table.size(); ++j) { |
| 218 | if (blend.components[j]) | 295 | if (blend.components[j]) { |
| 219 | color_components |= component_table[j]; | 296 | color_components |= component_table[j]; |
| 297 | } | ||
| 220 | } | 298 | } |
| 221 | 299 | ||
| 222 | cb_attachments[i] = vk::PipelineColorBlendAttachmentState( | 300 | VkPipelineColorBlendAttachmentState& attachment = cb_attachments[i]; |
| 223 | blend.enable, MaxwellToVK::BlendFactor(blend.src_rgb_func), | 301 | attachment.blendEnable = blend.enable; |
| 224 | MaxwellToVK::BlendFactor(blend.dst_rgb_func), | 302 | attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.src_rgb_func); |
| 225 | MaxwellToVK::BlendEquation(blend.rgb_equation), | 303 | attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.dst_rgb_func); |
| 226 | MaxwellToVK::BlendFactor(blend.src_a_func), MaxwellToVK::BlendFactor(blend.dst_a_func), | 304 | attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.rgb_equation); |
| 227 | MaxwellToVK::BlendEquation(blend.a_equation), color_components); | 305 | attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.src_a_func); |
| 306 | attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.dst_a_func); | ||
| 307 | attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.a_equation); | ||
| 308 | attachment.colorWriteMask = color_components; | ||
| 228 | } | 309 | } |
| 229 | const vk::PipelineColorBlendStateCreateInfo color_blending_ci({}, false, vk::LogicOp::eCopy, | 310 | |
| 230 | static_cast<u32>(num_attachments), | 311 | VkPipelineColorBlendStateCreateInfo color_blend_ci; |
| 231 | cb_attachments.data(), {}); | 312 | color_blend_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; |
| 232 | 313 | color_blend_ci.pNext = nullptr; | |
| 233 | constexpr std::array dynamic_states = { | 314 | color_blend_ci.flags = 0; |
| 234 | vk::DynamicState::eViewport, vk::DynamicState::eScissor, | 315 | color_blend_ci.logicOpEnable = VK_FALSE; |
| 235 | vk::DynamicState::eDepthBias, vk::DynamicState::eBlendConstants, | 316 | color_blend_ci.logicOp = VK_LOGIC_OP_COPY; |
| 236 | vk::DynamicState::eDepthBounds, vk::DynamicState::eStencilCompareMask, | 317 | color_blend_ci.attachmentCount = static_cast<u32>(num_attachments); |
| 237 | vk::DynamicState::eStencilWriteMask, vk::DynamicState::eStencilReference}; | 318 | color_blend_ci.pAttachments = cb_attachments.data(); |
| 238 | const vk::PipelineDynamicStateCreateInfo dynamic_state_ci( | 319 | std::memset(color_blend_ci.blendConstants, 0, sizeof(color_blend_ci.blendConstants)); |
| 239 | {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data()); | 320 | |
| 240 | 321 | static constexpr std::array dynamic_states = { | |
| 241 | vk::PipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; | 322 | VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, |
| 323 | VK_DYNAMIC_STATE_DEPTH_BIAS, VK_DYNAMIC_STATE_BLEND_CONSTANTS, | ||
| 324 | VK_DYNAMIC_STATE_DEPTH_BOUNDS, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, | ||
| 325 | VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE}; | ||
| 326 | |||
| 327 | VkPipelineDynamicStateCreateInfo dynamic_state_ci; | ||
| 328 | dynamic_state_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; | ||
| 329 | dynamic_state_ci.pNext = nullptr; | ||
| 330 | dynamic_state_ci.flags = 0; | ||
| 331 | dynamic_state_ci.dynamicStateCount = static_cast<u32>(dynamic_states.size()); | ||
| 332 | dynamic_state_ci.pDynamicStates = dynamic_states.data(); | ||
| 333 | |||
| 334 | VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci; | ||
| 335 | subgroup_size_ci.sType = | ||
| 336 | VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT; | ||
| 337 | subgroup_size_ci.pNext = nullptr; | ||
| 242 | subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; | 338 | subgroup_size_ci.requiredSubgroupSize = GuestWarpSize; |
| 243 | 339 | ||
| 244 | std::vector<vk::PipelineShaderStageCreateInfo> shader_stages; | 340 | std::vector<VkPipelineShaderStageCreateInfo> shader_stages; |
| 245 | std::size_t module_index = 0; | 341 | std::size_t module_index = 0; |
| 246 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 342 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 247 | if (!program[stage]) { | 343 | if (!program[stage]) { |
| 248 | continue; | 344 | continue; |
| 249 | } | 345 | } |
| 250 | const auto stage_enum = static_cast<Tegra::Engines::ShaderType>(stage); | 346 | VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back(); |
| 251 | const auto vk_stage = MaxwellToVK::ShaderStage(stage_enum); | 347 | stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; |
| 252 | auto& stage_ci = shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags{}, vk_stage, | 348 | stage_ci.pNext = nullptr; |
| 253 | *modules[module_index++], "main", nullptr); | 349 | stage_ci.flags = 0; |
| 254 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(vk_stage)) { | 350 | stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage)); |
| 351 | stage_ci.module = *modules[module_index++]; | ||
| 352 | stage_ci.pName = "main"; | ||
| 353 | stage_ci.pSpecializationInfo = nullptr; | ||
| 354 | |||
| 355 | if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) { | ||
| 255 | stage_ci.pNext = &subgroup_size_ci; | 356 | stage_ci.pNext = &subgroup_size_ci; |
| 256 | } | 357 | } |
| 257 | } | 358 | } |
| 258 | 359 | ||
| 259 | const vk::GraphicsPipelineCreateInfo create_info( | 360 | VkGraphicsPipelineCreateInfo ci; |
| 260 | {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input_ci, | 361 | ci.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; |
| 261 | &input_assembly_ci, &tessellation_ci, &viewport_ci, &rasterizer_ci, &multisampling_ci, | 362 | ci.pNext = nullptr; |
| 262 | &depth_stencil_ci, &color_blending_ci, &dynamic_state_ci, *layout, renderpass, 0, {}, 0); | 363 | ci.flags = 0; |
| 263 | 364 | ci.stageCount = static_cast<u32>(shader_stages.size()); | |
| 264 | const auto dev = device.GetLogical(); | 365 | ci.pStages = shader_stages.data(); |
| 265 | const auto& dld = device.GetDispatchLoader(); | 366 | ci.pVertexInputState = &vertex_input_ci; |
| 266 | return dev.createGraphicsPipelineUnique(nullptr, create_info, nullptr, dld); | 367 | ci.pInputAssemblyState = &input_assembly_ci; |
| 368 | ci.pTessellationState = &tessellation_ci; | ||
| 369 | ci.pViewportState = &viewport_ci; | ||
| 370 | ci.pRasterizationState = &rasterization_ci; | ||
| 371 | ci.pMultisampleState = &multisample_ci; | ||
| 372 | ci.pDepthStencilState = &depth_stencil_ci; | ||
| 373 | ci.pColorBlendState = &color_blend_ci; | ||
| 374 | ci.pDynamicState = &dynamic_state_ci; | ||
| 375 | ci.layout = *layout; | ||
| 376 | ci.renderPass = renderpass; | ||
| 377 | ci.subpass = 0; | ||
| 378 | ci.basePipelineHandle = nullptr; | ||
| 379 | ci.basePipelineIndex = 0; | ||
| 380 | return device.GetLogical().CreateGraphicsPipeline(ci); | ||
| 267 | } | 381 | } |
| 268 | 382 | ||
| 269 | } // namespace Vulkan | 383 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 4f5e4ea2d..7aba70960 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -11,12 +11,12 @@ | |||
| 11 | #include <vector> | 11 | #include <vector> |
| 12 | 12 | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | 13 | #include "video_core/engines/maxwell_3d.h" |
| 14 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 15 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 14 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 16 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 15 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 17 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 16 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 18 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 17 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 19 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 18 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 19 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 20 | 20 | ||
| 21 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | 22 | ||
| @@ -39,52 +39,52 @@ public: | |||
| 39 | VKUpdateDescriptorQueue& update_descriptor_queue, | 39 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 40 | VKRenderPassCache& renderpass_cache, | 40 | VKRenderPassCache& renderpass_cache, |
| 41 | const GraphicsPipelineCacheKey& key, | 41 | const GraphicsPipelineCacheKey& key, |
| 42 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings, | 42 | vk::Span<VkDescriptorSetLayoutBinding> bindings, |
| 43 | const SPIRVProgram& program); | 43 | const SPIRVProgram& program); |
| 44 | ~VKGraphicsPipeline(); | 44 | ~VKGraphicsPipeline(); |
| 45 | 45 | ||
| 46 | vk::DescriptorSet CommitDescriptorSet(); | 46 | VkDescriptorSet CommitDescriptorSet(); |
| 47 | 47 | ||
| 48 | vk::Pipeline GetHandle() const { | 48 | VkPipeline GetHandle() const { |
| 49 | return *pipeline; | 49 | return *pipeline; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | vk::PipelineLayout GetLayout() const { | 52 | VkPipelineLayout GetLayout() const { |
| 53 | return *layout; | 53 | return *layout; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | vk::RenderPass GetRenderPass() const { | 56 | VkRenderPass GetRenderPass() const { |
| 57 | return renderpass; | 57 | return renderpass; |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | private: | 60 | private: |
| 61 | UniqueDescriptorSetLayout CreateDescriptorSetLayout( | 61 | vk::DescriptorSetLayout CreateDescriptorSetLayout( |
| 62 | const std::vector<vk::DescriptorSetLayoutBinding>& bindings) const; | 62 | vk::Span<VkDescriptorSetLayoutBinding> bindings) const; |
| 63 | 63 | ||
| 64 | UniquePipelineLayout CreatePipelineLayout() const; | 64 | vk::PipelineLayout CreatePipelineLayout() const; |
| 65 | 65 | ||
| 66 | UniqueDescriptorUpdateTemplate CreateDescriptorUpdateTemplate( | 66 | vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate( |
| 67 | const SPIRVProgram& program) const; | 67 | const SPIRVProgram& program) const; |
| 68 | 68 | ||
| 69 | std::vector<UniqueShaderModule> CreateShaderModules(const SPIRVProgram& program) const; | 69 | std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const; |
| 70 | 70 | ||
| 71 | UniquePipeline CreatePipeline(const RenderPassParams& renderpass_params, | 71 | vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params, |
| 72 | const SPIRVProgram& program) const; | 72 | const SPIRVProgram& program) const; |
| 73 | 73 | ||
| 74 | const VKDevice& device; | 74 | const VKDevice& device; |
| 75 | VKScheduler& scheduler; | 75 | VKScheduler& scheduler; |
| 76 | const FixedPipelineState fixed_state; | 76 | const FixedPipelineState fixed_state; |
| 77 | const u64 hash; | 77 | const u64 hash; |
| 78 | 78 | ||
| 79 | UniqueDescriptorSetLayout descriptor_set_layout; | 79 | vk::DescriptorSetLayout descriptor_set_layout; |
| 80 | DescriptorAllocator descriptor_allocator; | 80 | DescriptorAllocator descriptor_allocator; |
| 81 | VKUpdateDescriptorQueue& update_descriptor_queue; | 81 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 82 | UniquePipelineLayout layout; | 82 | vk::PipelineLayout layout; |
| 83 | UniqueDescriptorUpdateTemplate descriptor_template; | 83 | vk::DescriptorUpdateTemplateKHR descriptor_template; |
| 84 | std::vector<UniqueShaderModule> modules; | 84 | std::vector<vk::ShaderModule> modules; |
| 85 | 85 | ||
| 86 | vk::RenderPass renderpass; | 86 | VkRenderPass renderpass; |
| 87 | UniquePipeline pipeline; | 87 | vk::Pipeline pipeline; |
| 88 | }; | 88 | }; |
| 89 | 89 | ||
| 90 | } // namespace Vulkan | 90 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp index 4bcbef959..9bceb3861 100644 --- a/src/video_core/renderer_vulkan/vk_image.cpp +++ b/src/video_core/renderer_vulkan/vk_image.cpp | |||
| @@ -6,22 +6,21 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | 7 | ||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_device.h" | 9 | #include "video_core/renderer_vulkan/vk_device.h" |
| 11 | #include "video_core/renderer_vulkan/vk_image.h" | 10 | #include "video_core/renderer_vulkan/vk_image.h" |
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| 16 | VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, | 16 | VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, const VkImageCreateInfo& image_ci, |
| 17 | const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask) | 17 | VkImageAspectFlags aspect_mask) |
| 18 | : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, | 18 | : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, |
| 19 | image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { | 19 | image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { |
| 20 | UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, | 20 | UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, |
| 21 | "Queue family tracking is not implemented"); | 21 | "Queue family tracking is not implemented"); |
| 22 | 22 | ||
| 23 | const auto dev = device.GetLogical(); | 23 | image = device.GetLogical().CreateImage(image_ci); |
| 24 | image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader()); | ||
| 25 | 24 | ||
| 26 | const u32 num_ranges = image_num_layers * image_num_levels; | 25 | const u32 num_ranges = image_num_layers * image_num_levels; |
| 27 | barriers.resize(num_ranges); | 26 | barriers.resize(num_ranges); |
| @@ -31,8 +30,8 @@ VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, | |||
| 31 | VKImage::~VKImage() = default; | 30 | VKImage::~VKImage() = default; |
| 32 | 31 | ||
| 33 | void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | 32 | void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, |
| 34 | vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | 33 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, |
| 35 | vk::ImageLayout new_layout) { | 34 | VkImageLayout new_layout) { |
| 36 | if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { | 35 | if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { |
| 37 | return; | 36 | return; |
| 38 | } | 37 | } |
| @@ -43,9 +42,21 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num | |||
| 43 | const u32 layer = base_layer + layer_it; | 42 | const u32 layer = base_layer + layer_it; |
| 44 | const u32 level = base_level + level_it; | 43 | const u32 level = base_level + level_it; |
| 45 | auto& state = GetSubrangeState(layer, level); | 44 | auto& state = GetSubrangeState(layer, level); |
| 46 | barriers[cursor] = vk::ImageMemoryBarrier( | 45 | auto& barrier = barriers[cursor]; |
| 47 | state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED, | 46 | barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; |
| 48 | VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1}); | 47 | barrier.pNext = nullptr; |
| 48 | barrier.srcAccessMask = state.access; | ||
| 49 | barrier.dstAccessMask = new_access; | ||
| 50 | barrier.oldLayout = state.layout; | ||
| 51 | barrier.newLayout = new_layout; | ||
| 52 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 53 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 54 | barrier.image = *image; | ||
| 55 | barrier.subresourceRange.aspectMask = aspect_mask; | ||
| 56 | barrier.subresourceRange.baseMipLevel = level; | ||
| 57 | barrier.subresourceRange.levelCount = 1; | ||
| 58 | barrier.subresourceRange.baseArrayLayer = layer; | ||
| 59 | barrier.subresourceRange.layerCount = 1; | ||
| 49 | state.access = new_access; | 60 | state.access = new_access; |
| 50 | state.layout = new_layout; | 61 | state.layout = new_layout; |
| 51 | } | 62 | } |
| @@ -53,16 +64,16 @@ void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num | |||
| 53 | 64 | ||
| 54 | scheduler.RequestOutsideRenderPassOperationContext(); | 65 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 55 | 66 | ||
| 56 | scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) { | 67 | scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) { |
| 57 | // TODO(Rodrigo): Implement a way to use the latest stage across subresources. | 68 | // TODO(Rodrigo): Implement a way to use the latest stage across subresources. |
| 58 | constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands; | 69 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 59 | cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr, | 70 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {}, |
| 60 | static_cast<u32>(cursor), barriers.data(), dld); | 71 | vk::Span(barriers.data(), cursor)); |
| 61 | }); | 72 | }); |
| 62 | } | 73 | } |
| 63 | 74 | ||
| 64 | bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | 75 | bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, |
| 65 | vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept { | 76 | VkAccessFlags new_access, VkImageLayout new_layout) noexcept { |
| 66 | const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && | 77 | const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && |
| 67 | base_level == 0 && num_levels == image_num_levels; | 78 | base_level == 0 && num_levels == image_num_levels; |
| 68 | if (!is_full_range) { | 79 | if (!is_full_range) { |
| @@ -91,11 +102,21 @@ bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num | |||
| 91 | 102 | ||
| 92 | void VKImage::CreatePresentView() { | 103 | void VKImage::CreatePresentView() { |
| 93 | // Image type has to be 2D to be presented. | 104 | // Image type has to be 2D to be presented. |
| 94 | const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {}, | 105 | VkImageViewCreateInfo image_view_ci; |
| 95 | {aspect_mask, 0, 1, 0, 1}); | 106 | image_view_ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; |
| 96 | const auto dev = device.GetLogical(); | 107 | image_view_ci.pNext = nullptr; |
| 97 | const auto& dld = device.GetDispatchLoader(); | 108 | image_view_ci.flags = 0; |
| 98 | present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld); | 109 | image_view_ci.image = *image; |
| 110 | image_view_ci.viewType = VK_IMAGE_VIEW_TYPE_2D; | ||
| 111 | image_view_ci.format = format; | ||
| 112 | image_view_ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 113 | VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; | ||
| 114 | image_view_ci.subresourceRange.aspectMask = aspect_mask; | ||
| 115 | image_view_ci.subresourceRange.baseMipLevel = 0; | ||
| 116 | image_view_ci.subresourceRange.levelCount = 1; | ||
| 117 | image_view_ci.subresourceRange.baseArrayLayer = 0; | ||
| 118 | image_view_ci.subresourceRange.layerCount = 1; | ||
| 119 | present_view = device.GetLogical().CreateImageView(image_view_ci); | ||
| 99 | } | 120 | } |
| 100 | 121 | ||
| 101 | VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { | 122 | VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { |
diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h index b78242512..b4d7229e5 100644 --- a/src/video_core/renderer_vulkan/vk_image.h +++ b/src/video_core/renderer_vulkan/vk_image.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/declarations.h" | 11 | #include "video_core/renderer_vulkan/wrapper.h" |
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| @@ -18,16 +18,16 @@ class VKScheduler; | |||
| 18 | class VKImage { | 18 | class VKImage { |
| 19 | public: | 19 | public: |
| 20 | explicit VKImage(const VKDevice& device, VKScheduler& scheduler, | 20 | explicit VKImage(const VKDevice& device, VKScheduler& scheduler, |
| 21 | const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask); | 21 | const VkImageCreateInfo& image_ci, VkImageAspectFlags aspect_mask); |
| 22 | ~VKImage(); | 22 | ~VKImage(); |
| 23 | 23 | ||
| 24 | /// Records in the passed command buffer an image transition and updates the state of the image. | 24 | /// Records in the passed command buffer an image transition and updates the state of the image. |
| 25 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | 25 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, |
| 26 | vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | 26 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, |
| 27 | vk::ImageLayout new_layout); | 27 | VkImageLayout new_layout); |
| 28 | 28 | ||
| 29 | /// Returns a view compatible with presentation, the image has to be 2D. | 29 | /// Returns a view compatible with presentation, the image has to be 2D. |
| 30 | vk::ImageView GetPresentView() { | 30 | VkImageView GetPresentView() { |
| 31 | if (!present_view) { | 31 | if (!present_view) { |
| 32 | CreatePresentView(); | 32 | CreatePresentView(); |
| 33 | } | 33 | } |
| @@ -35,28 +35,28 @@ public: | |||
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | /// Returns the Vulkan image handler. | 37 | /// Returns the Vulkan image handler. |
| 38 | vk::Image GetHandle() const { | 38 | const vk::Image& GetHandle() const { |
| 39 | return *image; | 39 | return image; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | /// Returns the Vulkan format for this image. | 42 | /// Returns the Vulkan format for this image. |
| 43 | vk::Format GetFormat() const { | 43 | VkFormat GetFormat() const { |
| 44 | return format; | 44 | return format; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | /// Returns the Vulkan aspect mask. | 47 | /// Returns the Vulkan aspect mask. |
| 48 | vk::ImageAspectFlags GetAspectMask() const { | 48 | VkImageAspectFlags GetAspectMask() const { |
| 49 | return aspect_mask; | 49 | return aspect_mask; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | private: | 52 | private: |
| 53 | struct SubrangeState final { | 53 | struct SubrangeState final { |
| 54 | vk::AccessFlags access{}; ///< Current access bits. | 54 | VkAccessFlags access = 0; ///< Current access bits. |
| 55 | vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout. | 55 | VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout. |
| 56 | }; | 56 | }; |
| 57 | 57 | ||
| 58 | bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | 58 | bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, |
| 59 | vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept; | 59 | VkAccessFlags new_access, VkImageLayout new_layout) noexcept; |
| 60 | 60 | ||
| 61 | /// Creates a presentation view. | 61 | /// Creates a presentation view. |
| 62 | void CreatePresentView(); | 62 | void CreatePresentView(); |
| @@ -67,16 +67,16 @@ private: | |||
| 67 | const VKDevice& device; ///< Device handler. | 67 | const VKDevice& device; ///< Device handler. |
| 68 | VKScheduler& scheduler; ///< Device scheduler. | 68 | VKScheduler& scheduler; ///< Device scheduler. |
| 69 | 69 | ||
| 70 | const vk::Format format; ///< Vulkan format. | 70 | const VkFormat format; ///< Vulkan format. |
| 71 | const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask. | 71 | const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask. |
| 72 | const u32 image_num_layers; ///< Number of layers. | 72 | const u32 image_num_layers; ///< Number of layers. |
| 73 | const u32 image_num_levels; ///< Number of mipmap levels. | 73 | const u32 image_num_levels; ///< Number of mipmap levels. |
| 74 | 74 | ||
| 75 | UniqueImage image; ///< Image handle. | 75 | vk::Image image; ///< Image handle. |
| 76 | UniqueImageView present_view; ///< Image view compatible with presentation. | 76 | vk::ImageView present_view; ///< Image view compatible with presentation. |
| 77 | 77 | ||
| 78 | std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers. | 78 | std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers. |
| 79 | std::vector<SubrangeState> subrange_states; ///< Current subrange state. | 79 | std::vector<SubrangeState> subrange_states; ///< Current subrange state. |
| 80 | 80 | ||
| 81 | bool state_diverged = false; ///< True when subresources mismatch in layout. | 81 | bool state_diverged = false; ///< True when subresources mismatch in layout. |
| 82 | }; | 82 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.cpp b/src/video_core/renderer_vulkan/vk_memory_manager.cpp index 9cc9979d0..6a9e658bf 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_memory_manager.cpp | |||
| @@ -11,9 +11,9 @@ | |||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_device.h" | 14 | #include "video_core/renderer_vulkan/vk_device.h" |
| 16 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| @@ -30,17 +30,11 @@ u64 GetAllocationChunkSize(u64 required_size) { | |||
| 30 | class VKMemoryAllocation final { | 30 | class VKMemoryAllocation final { |
| 31 | public: | 31 | public: |
| 32 | explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, | 32 | explicit VKMemoryAllocation(const VKDevice& device, vk::DeviceMemory memory, |
| 33 | vk::MemoryPropertyFlags properties, u64 allocation_size, u32 type) | 33 | VkMemoryPropertyFlags properties, u64 allocation_size, u32 type) |
| 34 | : device{device}, memory{memory}, properties{properties}, allocation_size{allocation_size}, | 34 | : device{device}, memory{std::move(memory)}, properties{properties}, |
| 35 | shifted_type{ShiftType(type)} {} | 35 | allocation_size{allocation_size}, shifted_type{ShiftType(type)} {} |
| 36 | |||
| 37 | ~VKMemoryAllocation() { | ||
| 38 | const auto dev = device.GetLogical(); | ||
| 39 | const auto& dld = device.GetDispatchLoader(); | ||
| 40 | dev.free(memory, nullptr, dld); | ||
| 41 | } | ||
| 42 | 36 | ||
| 43 | VKMemoryCommit Commit(vk::DeviceSize commit_size, vk::DeviceSize alignment) { | 37 | VKMemoryCommit Commit(VkDeviceSize commit_size, VkDeviceSize alignment) { |
| 44 | auto found = TryFindFreeSection(free_iterator, allocation_size, | 38 | auto found = TryFindFreeSection(free_iterator, allocation_size, |
| 45 | static_cast<u64>(commit_size), static_cast<u64>(alignment)); | 39 | static_cast<u64>(commit_size), static_cast<u64>(alignment)); |
| 46 | if (!found) { | 40 | if (!found) { |
| @@ -73,9 +67,8 @@ public: | |||
| 73 | } | 67 | } |
| 74 | 68 | ||
| 75 | /// Returns whether this allocation is compatible with the arguments. | 69 | /// Returns whether this allocation is compatible with the arguments. |
| 76 | bool IsCompatible(vk::MemoryPropertyFlags wanted_properties, u32 type_mask) const { | 70 | bool IsCompatible(VkMemoryPropertyFlags wanted_properties, u32 type_mask) const { |
| 77 | return (wanted_properties & properties) != vk::MemoryPropertyFlagBits(0) && | 71 | return (wanted_properties & properties) && (type_mask & shifted_type) != 0; |
| 78 | (type_mask & shifted_type) != 0; | ||
| 79 | } | 72 | } |
| 80 | 73 | ||
| 81 | private: | 74 | private: |
| @@ -111,11 +104,11 @@ private: | |||
| 111 | return std::nullopt; | 104 | return std::nullopt; |
| 112 | } | 105 | } |
| 113 | 106 | ||
| 114 | const VKDevice& device; ///< Vulkan device. | 107 | const VKDevice& device; ///< Vulkan device. |
| 115 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. | 108 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. |
| 116 | const vk::MemoryPropertyFlags properties; ///< Vulkan properties. | 109 | const VkMemoryPropertyFlags properties; ///< Vulkan properties. |
| 117 | const u64 allocation_size; ///< Size of this allocation. | 110 | const u64 allocation_size; ///< Size of this allocation. |
| 118 | const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. | 111 | const u32 shifted_type; ///< Stored Vulkan type of this allocation, shifted. |
| 119 | 112 | ||
| 120 | /// Hints where the next free region is likely going to be. | 113 | /// Hints where the next free region is likely going to be. |
| 121 | u64 free_iterator{}; | 114 | u64 free_iterator{}; |
| @@ -125,22 +118,20 @@ private: | |||
| 125 | }; | 118 | }; |
| 126 | 119 | ||
| 127 | VKMemoryManager::VKMemoryManager(const VKDevice& device) | 120 | VKMemoryManager::VKMemoryManager(const VKDevice& device) |
| 128 | : device{device}, properties{device.GetPhysical().getMemoryProperties( | 121 | : device{device}, properties{device.GetPhysical().GetMemoryProperties()}, |
| 129 | device.GetDispatchLoader())}, | ||
| 130 | is_memory_unified{GetMemoryUnified(properties)} {} | 122 | is_memory_unified{GetMemoryUnified(properties)} {} |
| 131 | 123 | ||
| 132 | VKMemoryManager::~VKMemoryManager() = default; | 124 | VKMemoryManager::~VKMemoryManager() = default; |
| 133 | 125 | ||
| 134 | VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirements, | 126 | VKMemoryCommit VKMemoryManager::Commit(const VkMemoryRequirements& requirements, |
| 135 | bool host_visible) { | 127 | bool host_visible) { |
| 136 | const u64 chunk_size = GetAllocationChunkSize(requirements.size); | 128 | const u64 chunk_size = GetAllocationChunkSize(requirements.size); |
| 137 | 129 | ||
| 138 | // When a host visible commit is asked, search for host visible and coherent, otherwise search | 130 | // When a host visible commit is asked, search for host visible and coherent, otherwise search |
| 139 | // for a fast device local type. | 131 | // for a fast device local type. |
| 140 | const vk::MemoryPropertyFlags wanted_properties = | 132 | const VkMemoryPropertyFlags wanted_properties = |
| 141 | host_visible | 133 | host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
| 142 | ? vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | 134 | : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; |
| 143 | : vk::MemoryPropertyFlagBits::eDeviceLocal; | ||
| 144 | 135 | ||
| 145 | if (auto commit = TryAllocCommit(requirements, wanted_properties)) { | 136 | if (auto commit = TryAllocCommit(requirements, wanted_properties)) { |
| 146 | return commit; | 137 | return commit; |
| @@ -161,23 +152,19 @@ VKMemoryCommit VKMemoryManager::Commit(const vk::MemoryRequirements& requirement | |||
| 161 | return commit; | 152 | return commit; |
| 162 | } | 153 | } |
| 163 | 154 | ||
| 164 | VKMemoryCommit VKMemoryManager::Commit(vk::Buffer buffer, bool host_visible) { | 155 | VKMemoryCommit VKMemoryManager::Commit(const vk::Buffer& buffer, bool host_visible) { |
| 165 | const auto dev = device.GetLogical(); | 156 | auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), host_visible); |
| 166 | const auto& dld = device.GetDispatchLoader(); | 157 | buffer.BindMemory(commit->GetMemory(), commit->GetOffset()); |
| 167 | auto commit = Commit(dev.getBufferMemoryRequirements(buffer, dld), host_visible); | ||
| 168 | dev.bindBufferMemory(buffer, commit->GetMemory(), commit->GetOffset(), dld); | ||
| 169 | return commit; | 158 | return commit; |
| 170 | } | 159 | } |
| 171 | 160 | ||
| 172 | VKMemoryCommit VKMemoryManager::Commit(vk::Image image, bool host_visible) { | 161 | VKMemoryCommit VKMemoryManager::Commit(const vk::Image& image, bool host_visible) { |
| 173 | const auto dev = device.GetLogical(); | 162 | auto commit = Commit(device.GetLogical().GetImageMemoryRequirements(*image), host_visible); |
| 174 | const auto& dld = device.GetDispatchLoader(); | 163 | image.BindMemory(commit->GetMemory(), commit->GetOffset()); |
| 175 | auto commit = Commit(dev.getImageMemoryRequirements(image, dld), host_visible); | ||
| 176 | dev.bindImageMemory(image, commit->GetMemory(), commit->GetOffset(), dld); | ||
| 177 | return commit; | 164 | return commit; |
| 178 | } | 165 | } |
| 179 | 166 | ||
| 180 | bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, | 167 | bool VKMemoryManager::AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, |
| 181 | u64 size) { | 168 | u64 size) { |
| 182 | const u32 type = [&] { | 169 | const u32 type = [&] { |
| 183 | for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { | 170 | for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { |
| @@ -191,24 +178,26 @@ bool VKMemoryManager::AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 | |||
| 191 | return 0U; | 178 | return 0U; |
| 192 | }(); | 179 | }(); |
| 193 | 180 | ||
| 194 | const auto dev = device.GetLogical(); | ||
| 195 | const auto& dld = device.GetDispatchLoader(); | ||
| 196 | |||
| 197 | // Try to allocate found type. | 181 | // Try to allocate found type. |
| 198 | const vk::MemoryAllocateInfo memory_ai(size, type); | 182 | VkMemoryAllocateInfo memory_ai; |
| 199 | vk::DeviceMemory memory; | 183 | memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; |
| 200 | if (const auto res = dev.allocateMemory(&memory_ai, nullptr, &memory, dld); | 184 | memory_ai.pNext = nullptr; |
| 201 | res != vk::Result::eSuccess) { | 185 | memory_ai.allocationSize = size; |
| 202 | LOG_CRITICAL(Render_Vulkan, "Device allocation failed with code {}!", vk::to_string(res)); | 186 | memory_ai.memoryTypeIndex = type; |
| 187 | |||
| 188 | vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory(memory_ai); | ||
| 189 | if (!memory) { | ||
| 190 | LOG_CRITICAL(Render_Vulkan, "Device allocation failed!"); | ||
| 203 | return false; | 191 | return false; |
| 204 | } | 192 | } |
| 205 | allocations.push_back( | 193 | |
| 206 | std::make_unique<VKMemoryAllocation>(device, memory, wanted_properties, size, type)); | 194 | allocations.push_back(std::make_unique<VKMemoryAllocation>(device, std::move(memory), |
| 195 | wanted_properties, size, type)); | ||
| 207 | return true; | 196 | return true; |
| 208 | } | 197 | } |
| 209 | 198 | ||
| 210 | VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& requirements, | 199 | VKMemoryCommit VKMemoryManager::TryAllocCommit(const VkMemoryRequirements& requirements, |
| 211 | vk::MemoryPropertyFlags wanted_properties) { | 200 | VkMemoryPropertyFlags wanted_properties) { |
| 212 | for (auto& allocation : allocations) { | 201 | for (auto& allocation : allocations) { |
| 213 | if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { | 202 | if (!allocation->IsCompatible(wanted_properties, requirements.memoryTypeBits)) { |
| 214 | continue; | 203 | continue; |
| @@ -220,10 +209,9 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req | |||
| 220 | return {}; | 209 | return {}; |
| 221 | } | 210 | } |
| 222 | 211 | ||
| 223 | /*static*/ bool VKMemoryManager::GetMemoryUnified( | 212 | bool VKMemoryManager::GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties) { |
| 224 | const vk::PhysicalDeviceMemoryProperties& properties) { | ||
| 225 | for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { | 213 | for (u32 heap_index = 0; heap_index < properties.memoryHeapCount; ++heap_index) { |
| 226 | if (!(properties.memoryHeaps[heap_index].flags & vk::MemoryHeapFlagBits::eDeviceLocal)) { | 214 | if (!(properties.memoryHeaps[heap_index].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT)) { |
| 227 | // Memory is considered unified when heaps are device local only. | 215 | // Memory is considered unified when heaps are device local only. |
| 228 | return false; | 216 | return false; |
| 229 | } | 217 | } |
| @@ -232,23 +220,19 @@ VKMemoryCommit VKMemoryManager::TryAllocCommit(const vk::MemoryRequirements& req | |||
| 232 | } | 220 | } |
| 233 | 221 | ||
| 234 | VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, | 222 | VKMemoryCommitImpl::VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, |
| 235 | vk::DeviceMemory memory, u64 begin, u64 end) | 223 | const vk::DeviceMemory& memory, u64 begin, u64 end) |
| 236 | : device{device}, interval{begin, end}, memory{memory}, allocation{allocation} {} | 224 | : device{device}, memory{memory}, interval{begin, end}, allocation{allocation} {} |
| 237 | 225 | ||
| 238 | VKMemoryCommitImpl::~VKMemoryCommitImpl() { | 226 | VKMemoryCommitImpl::~VKMemoryCommitImpl() { |
| 239 | allocation->Free(this); | 227 | allocation->Free(this); |
| 240 | } | 228 | } |
| 241 | 229 | ||
| 242 | MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { | 230 | MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const { |
| 243 | const auto dev = device.GetLogical(); | 231 | return MemoryMap{this, memory.Map(interval.first + offset_, size)}; |
| 244 | const auto address = reinterpret_cast<u8*>( | ||
| 245 | dev.mapMemory(memory, interval.first + offset_, size, {}, device.GetDispatchLoader())); | ||
| 246 | return MemoryMap{this, address}; | ||
| 247 | } | 232 | } |
| 248 | 233 | ||
| 249 | void VKMemoryCommitImpl::Unmap() const { | 234 | void VKMemoryCommitImpl::Unmap() const { |
| 250 | const auto dev = device.GetLogical(); | 235 | memory.Unmap(); |
| 251 | dev.unmapMemory(memory, device.GetDispatchLoader()); | ||
| 252 | } | 236 | } |
| 253 | 237 | ||
| 254 | MemoryMap VKMemoryCommitImpl::Map() const { | 238 | MemoryMap VKMemoryCommitImpl::Map() const { |
diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index cd00bb91b..35ee54d30 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/declarations.h" | 11 | #include "video_core/renderer_vulkan/wrapper.h" |
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| @@ -32,13 +32,13 @@ public: | |||
| 32 | * memory. When passing false, it will try to allocate device local memory. | 32 | * memory. When passing false, it will try to allocate device local memory. |
| 33 | * @returns A memory commit. | 33 | * @returns A memory commit. |
| 34 | */ | 34 | */ |
| 35 | VKMemoryCommit Commit(const vk::MemoryRequirements& reqs, bool host_visible); | 35 | VKMemoryCommit Commit(const VkMemoryRequirements& reqs, bool host_visible); |
| 36 | 36 | ||
| 37 | /// Commits memory required by the buffer and binds it. | 37 | /// Commits memory required by the buffer and binds it. |
| 38 | VKMemoryCommit Commit(vk::Buffer buffer, bool host_visible); | 38 | VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible); |
| 39 | 39 | ||
| 40 | /// Commits memory required by the image and binds it. | 40 | /// Commits memory required by the image and binds it. |
| 41 | VKMemoryCommit Commit(vk::Image image, bool host_visible); | 41 | VKMemoryCommit Commit(const vk::Image& image, bool host_visible); |
| 42 | 42 | ||
| 43 | /// Returns true if the memory allocations are done always in host visible and coherent memory. | 43 | /// Returns true if the memory allocations are done always in host visible and coherent memory. |
| 44 | bool IsMemoryUnified() const { | 44 | bool IsMemoryUnified() const { |
| @@ -47,18 +47,18 @@ public: | |||
| 47 | 47 | ||
| 48 | private: | 48 | private: |
| 49 | /// Allocates a chunk of memory. | 49 | /// Allocates a chunk of memory. |
| 50 | bool AllocMemory(vk::MemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); | 50 | bool AllocMemory(VkMemoryPropertyFlags wanted_properties, u32 type_mask, u64 size); |
| 51 | 51 | ||
| 52 | /// Tries to allocate a memory commit. | 52 | /// Tries to allocate a memory commit. |
| 53 | VKMemoryCommit TryAllocCommit(const vk::MemoryRequirements& requirements, | 53 | VKMemoryCommit TryAllocCommit(const VkMemoryRequirements& requirements, |
| 54 | vk::MemoryPropertyFlags wanted_properties); | 54 | VkMemoryPropertyFlags wanted_properties); |
| 55 | 55 | ||
| 56 | /// Returns true if the device uses an unified memory model. | 56 | /// Returns true if the device uses an unified memory model. |
| 57 | static bool GetMemoryUnified(const vk::PhysicalDeviceMemoryProperties& properties); | 57 | static bool GetMemoryUnified(const VkPhysicalDeviceMemoryProperties& properties); |
| 58 | 58 | ||
| 59 | const VKDevice& device; ///< Device handler. | 59 | const VKDevice& device; ///< Device handler. |
| 60 | const vk::PhysicalDeviceMemoryProperties properties; ///< Physical device properties. | 60 | const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. |
| 61 | const bool is_memory_unified; ///< True if memory model is unified. | 61 | const bool is_memory_unified; ///< True if memory model is unified. |
| 62 | std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. | 62 | std::vector<std::unique_ptr<VKMemoryAllocation>> allocations; ///< Current allocations. |
| 63 | }; | 63 | }; |
| 64 | 64 | ||
| @@ -68,7 +68,7 @@ class VKMemoryCommitImpl final { | |||
| 68 | 68 | ||
| 69 | public: | 69 | public: |
| 70 | explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, | 70 | explicit VKMemoryCommitImpl(const VKDevice& device, VKMemoryAllocation* allocation, |
| 71 | vk::DeviceMemory memory, u64 begin, u64 end); | 71 | const vk::DeviceMemory& memory, u64 begin, u64 end); |
| 72 | ~VKMemoryCommitImpl(); | 72 | ~VKMemoryCommitImpl(); |
| 73 | 73 | ||
| 74 | /// Maps a memory region and returns a pointer to it. | 74 | /// Maps a memory region and returns a pointer to it. |
| @@ -80,13 +80,13 @@ public: | |||
| 80 | MemoryMap Map() const; | 80 | MemoryMap Map() const; |
| 81 | 81 | ||
| 82 | /// Returns the Vulkan memory handler. | 82 | /// Returns the Vulkan memory handler. |
| 83 | vk::DeviceMemory GetMemory() const { | 83 | VkDeviceMemory GetMemory() const { |
| 84 | return memory; | 84 | return *memory; |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | /// Returns the start position of the commit relative to the allocation. | 87 | /// Returns the start position of the commit relative to the allocation. |
| 88 | vk::DeviceSize GetOffset() const { | 88 | VkDeviceSize GetOffset() const { |
| 89 | return static_cast<vk::DeviceSize>(interval.first); | 89 | return static_cast<VkDeviceSize>(interval.first); |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | private: | 92 | private: |
| @@ -94,8 +94,8 @@ private: | |||
| 94 | void Unmap() const; | 94 | void Unmap() const; |
| 95 | 95 | ||
| 96 | const VKDevice& device; ///< Vulkan device. | 96 | const VKDevice& device; ///< Vulkan device. |
| 97 | const vk::DeviceMemory& memory; ///< Vulkan device memory handler. | ||
| 97 | std::pair<u64, u64> interval{}; ///< Interval where the commit exists. | 98 | std::pair<u64, u64> interval{}; ///< Interval where the commit exists. |
| 98 | vk::DeviceMemory memory; ///< Vulkan device memory handler. | ||
| 99 | VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. | 99 | VKMemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. |
| 100 | }; | 100 | }; |
| 101 | 101 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 557b9d662..90e3a8edd 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -13,7 +13,6 @@ | |||
| 13 | #include "video_core/engines/kepler_compute.h" | 13 | #include "video_core/engines/kepler_compute.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/engines/maxwell_3d.h" |
| 15 | #include "video_core/memory_manager.h" | 15 | #include "video_core/memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 17 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 16 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 18 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 17 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 19 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 18 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| @@ -26,6 +25,7 @@ | |||
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 25 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 27 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 28 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 27 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 28 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 29 | #include "video_core/shader/compiler_settings.h" | 29 | #include "video_core/shader/compiler_settings.h" |
| 30 | 30 | ||
| 31 | namespace Vulkan { | 31 | namespace Vulkan { |
| @@ -36,12 +36,11 @@ using Tegra::Engines::ShaderType; | |||
| 36 | 36 | ||
| 37 | namespace { | 37 | namespace { |
| 38 | 38 | ||
| 39 | // C++20's using enum | 39 | constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; |
| 40 | constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer; | 40 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; |
| 41 | constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer; | 41 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; |
| 42 | constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer; | 42 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; |
| 43 | constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler; | 43 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; |
| 44 | constexpr auto eStorageImage = vk::DescriptorType::eStorageImage; | ||
| 45 | 44 | ||
| 46 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 45 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ |
| 47 | VideoCommon::Shader::CompileDepth::FullDecompile}; | 46 | VideoCommon::Shader::CompileDepth::FullDecompile}; |
| @@ -126,43 +125,48 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) { | |||
| 126 | } | 125 | } |
| 127 | } | 126 | } |
| 128 | 127 | ||
| 129 | template <vk::DescriptorType descriptor_type, class Container> | 128 | template <VkDescriptorType descriptor_type, class Container> |
| 130 | void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding, | 129 | void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding, |
| 131 | vk::ShaderStageFlags stage_flags, const Container& container) { | 130 | VkShaderStageFlags stage_flags, const Container& container) { |
| 132 | const u32 num_entries = static_cast<u32>(std::size(container)); | 131 | const u32 num_entries = static_cast<u32>(std::size(container)); |
| 133 | for (std::size_t i = 0; i < num_entries; ++i) { | 132 | for (std::size_t i = 0; i < num_entries; ++i) { |
| 134 | u32 count = 1; | 133 | u32 count = 1; |
| 135 | if constexpr (descriptor_type == eCombinedImageSampler) { | 134 | if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { |
| 136 | // Combined image samplers can be arrayed. | 135 | // Combined image samplers can be arrayed. |
| 137 | count = container[i].Size(); | 136 | count = container[i].Size(); |
| 138 | } | 137 | } |
| 139 | bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr); | 138 | VkDescriptorSetLayoutBinding& entry = bindings.emplace_back(); |
| 139 | entry.binding = binding++; | ||
| 140 | entry.descriptorType = descriptor_type; | ||
| 141 | entry.descriptorCount = count; | ||
| 142 | entry.stageFlags = stage_flags; | ||
| 143 | entry.pImmutableSamplers = nullptr; | ||
| 140 | } | 144 | } |
| 141 | } | 145 | } |
| 142 | 146 | ||
| 143 | u32 FillDescriptorLayout(const ShaderEntries& entries, | 147 | u32 FillDescriptorLayout(const ShaderEntries& entries, |
| 144 | std::vector<vk::DescriptorSetLayoutBinding>& bindings, | 148 | std::vector<VkDescriptorSetLayoutBinding>& bindings, |
| 145 | Maxwell::ShaderProgram program_type, u32 base_binding) { | 149 | Maxwell::ShaderProgram program_type, u32 base_binding) { |
| 146 | const ShaderType stage = GetStageFromProgram(program_type); | 150 | const ShaderType stage = GetStageFromProgram(program_type); |
| 147 | const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); | 151 | const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage); |
| 148 | 152 | ||
| 149 | u32 binding = base_binding; | 153 | u32 binding = base_binding; |
| 150 | AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers); | 154 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); |
| 151 | AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers); | 155 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); |
| 152 | AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers); | 156 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); |
| 153 | AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers); | 157 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); |
| 154 | AddBindings<eStorageImage>(bindings, binding, flags, entries.images); | 158 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); |
| 155 | return binding; | 159 | return binding; |
| 156 | } | 160 | } |
| 157 | 161 | ||
| 158 | } // Anonymous namespace | 162 | } // Anonymous namespace |
| 159 | 163 | ||
| 160 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | 164 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, |
| 161 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | 165 | GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, |
| 162 | ProgramCode program_code, u32 main_offset) | 166 | u32 main_offset) |
| 163 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | 167 | : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, |
| 164 | program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, | 168 | registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, |
| 165 | shader_ir{this->program_code, main_offset, compiler_settings, registry}, | 169 | compiler_settings, registry}, |
| 166 | entries{GenerateShaderEntries(shader_ir)} {} | 170 | entries{GenerateShaderEntries(shader_ir)} {} |
| 167 | 171 | ||
| 168 | CachedShader::~CachedShader() = default; | 172 | CachedShader::~CachedShader() = default; |
| @@ -201,19 +205,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 201 | 205 | ||
| 202 | auto& memory_manager{system.GPU().MemoryManager()}; | 206 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 203 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 207 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; |
| 204 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 208 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 205 | auto shader = TryGet(host_ptr); | 209 | ASSERT(cpu_addr); |
| 210 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||
| 206 | if (!shader) { | 211 | if (!shader) { |
| 212 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | ||
| 213 | |||
| 207 | // No shader found - create a new one | 214 | // No shader found - create a new one |
| 208 | constexpr u32 stage_offset = 10; | 215 | constexpr u32 stage_offset = 10; |
| 209 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); | 216 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); |
| 210 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); | 217 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); |
| 211 | 218 | ||
| 212 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||
| 213 | ASSERT(cpu_addr); | ||
| 214 | |||
| 215 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | 219 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, |
| 216 | host_ptr, std::move(code), stage_offset); | 220 | std::move(code), stage_offset); |
| 217 | Register(shader); | 221 | Register(shader); |
| 218 | } | 222 | } |
| 219 | shaders[index] = std::move(shader); | 223 | shaders[index] = std::move(shader); |
| @@ -253,18 +257,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 253 | 257 | ||
| 254 | auto& memory_manager = system.GPU().MemoryManager(); | 258 | auto& memory_manager = system.GPU().MemoryManager(); |
| 255 | const auto program_addr = key.shader; | 259 | const auto program_addr = key.shader; |
| 256 | const auto host_ptr = memory_manager.GetPointer(program_addr); | ||
| 257 | 260 | ||
| 258 | auto shader = TryGet(host_ptr); | 261 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 262 | ASSERT(cpu_addr); | ||
| 263 | |||
| 264 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||
| 259 | if (!shader) { | 265 | if (!shader) { |
| 260 | // No shader found - create a new one | 266 | // No shader found - create a new one |
| 261 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 267 | const auto host_ptr = memory_manager.GetPointer(program_addr); |
| 262 | ASSERT(cpu_addr); | ||
| 263 | 268 | ||
| 264 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); | 269 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); |
| 265 | constexpr u32 kernel_main_offset = 0; | 270 | constexpr u32 kernel_main_offset = 0; |
| 266 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | 271 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, |
| 267 | program_addr, *cpu_addr, host_ptr, std::move(code), | 272 | program_addr, *cpu_addr, std::move(code), |
| 268 | kernel_main_offset); | 273 | kernel_main_offset); |
| 269 | Register(shader); | 274 | Register(shader); |
| 270 | } | 275 | } |
| @@ -317,7 +322,7 @@ void VKPipelineCache::Unregister(const Shader& shader) { | |||
| 317 | RasterizerCache::Unregister(shader); | 322 | RasterizerCache::Unregister(shader); |
| 318 | } | 323 | } |
| 319 | 324 | ||
| 320 | std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> | 325 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> |
| 321 | VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | 326 | VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { |
| 322 | const auto& fixed_state = key.fixed_state; | 327 | const auto& fixed_state = key.fixed_state; |
| 323 | auto& memory_manager = system.GPU().MemoryManager(); | 328 | auto& memory_manager = system.GPU().MemoryManager(); |
| @@ -334,7 +339,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 334 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 339 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 335 | 340 | ||
| 336 | SPIRVProgram program; | 341 | SPIRVProgram program; |
| 337 | std::vector<vk::DescriptorSetLayoutBinding> bindings; | 342 | std::vector<VkDescriptorSetLayoutBinding> bindings; |
| 338 | 343 | ||
| 339 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 344 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 340 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); | 345 | const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); |
| @@ -345,8 +350,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 345 | } | 350 | } |
| 346 | 351 | ||
| 347 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | 352 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); |
| 348 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 353 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
| 349 | const auto shader = TryGet(host_ptr); | 354 | ASSERT(cpu_addr); |
| 355 | const auto shader = TryGet(*cpu_addr); | ||
| 350 | ASSERT(shader); | 356 | ASSERT(shader); |
| 351 | 357 | ||
| 352 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 358 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| @@ -369,32 +375,49 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 369 | return {std::move(program), std::move(bindings)}; | 375 | return {std::move(program), std::move(bindings)}; |
| 370 | } | 376 | } |
| 371 | 377 | ||
| 372 | template <vk::DescriptorType descriptor_type, class Container> | 378 | template <VkDescriptorType descriptor_type, class Container> |
| 373 | void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding, | 379 | void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding, |
| 374 | u32& offset, const Container& container) { | 380 | u32& offset, const Container& container) { |
| 375 | static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); | 381 | static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); |
| 376 | const u32 count = static_cast<u32>(std::size(container)); | 382 | const u32 count = static_cast<u32>(std::size(container)); |
| 377 | 383 | ||
| 378 | if constexpr (descriptor_type == eCombinedImageSampler) { | 384 | if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) { |
| 379 | for (u32 i = 0; i < count; ++i) { | 385 | for (u32 i = 0; i < count; ++i) { |
| 380 | const u32 num_samplers = container[i].Size(); | 386 | const u32 num_samplers = container[i].Size(); |
| 381 | template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset, | 387 | VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); |
| 382 | entry_size); | 388 | entry.dstBinding = binding; |
| 389 | entry.dstArrayElement = 0; | ||
| 390 | entry.descriptorCount = num_samplers; | ||
| 391 | entry.descriptorType = descriptor_type; | ||
| 392 | entry.offset = offset; | ||
| 393 | entry.stride = entry_size; | ||
| 394 | |||
| 383 | ++binding; | 395 | ++binding; |
| 384 | offset += num_samplers * entry_size; | 396 | offset += num_samplers * entry_size; |
| 385 | } | 397 | } |
| 386 | return; | 398 | return; |
| 387 | } | 399 | } |
| 388 | 400 | ||
| 389 | if constexpr (descriptor_type == eUniformTexelBuffer) { | 401 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { |
| 390 | // Nvidia has a bug where updating multiple uniform texels at once causes the driver to | 402 | // Nvidia has a bug where updating multiple uniform texels at once causes the driver to |
| 391 | // crash. | 403 | // crash. |
| 392 | for (u32 i = 0; i < count; ++i) { | 404 | for (u32 i = 0; i < count; ++i) { |
| 393 | template_entries.emplace_back(binding + i, 0, 1, descriptor_type, | 405 | VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); |
| 394 | offset + i * entry_size, entry_size); | 406 | entry.dstBinding = binding + i; |
| 407 | entry.dstArrayElement = 0; | ||
| 408 | entry.descriptorCount = 1; | ||
| 409 | entry.descriptorType = descriptor_type; | ||
| 410 | entry.offset = offset + i * entry_size; | ||
| 411 | entry.stride = entry_size; | ||
| 395 | } | 412 | } |
| 396 | } else if (count > 0) { | 413 | } else if (count > 0) { |
| 397 | template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); | 414 | VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); |
| 415 | entry.dstBinding = binding; | ||
| 416 | entry.dstArrayElement = 0; | ||
| 417 | entry.descriptorCount = count; | ||
| 418 | entry.descriptorType = descriptor_type; | ||
| 419 | entry.offset = offset; | ||
| 420 | entry.stride = entry_size; | ||
| 398 | } | 421 | } |
| 399 | offset += count * entry_size; | 422 | offset += count * entry_size; |
| 400 | binding += count; | 423 | binding += count; |
| @@ -402,12 +425,12 @@ void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, | |||
| 402 | 425 | ||
| 403 | void FillDescriptorUpdateTemplateEntries( | 426 | void FillDescriptorUpdateTemplateEntries( |
| 404 | const ShaderEntries& entries, u32& binding, u32& offset, | 427 | const ShaderEntries& entries, u32& binding, u32& offset, |
| 405 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { | 428 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { |
| 406 | AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers); | 429 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); |
| 407 | AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers); | 430 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); |
| 408 | AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers); | 431 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); |
| 409 | AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers); | 432 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); |
| 410 | AddEntry<eStorageImage>(template_entries, offset, binding, entries.images); | 433 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); |
| 411 | } | 434 | } |
| 412 | 435 | ||
| 413 | } // namespace Vulkan | 436 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c4c112290..7ccdb7083 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -19,12 +19,12 @@ | |||
| 19 | #include "video_core/engines/const_buffer_engine_interface.h" | 19 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/rasterizer_cache.h" | 21 | #include "video_core/rasterizer_cache.h" |
| 22 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 24 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 23 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 25 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 25 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 27 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" | 26 | #include "video_core/renderer_vulkan/vk_shader_decompiler.h" |
| 27 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 28 | #include "video_core/shader/registry.h" | 28 | #include "video_core/shader/registry.h" |
| 29 | #include "video_core/shader/shader_ir.h" | 29 | #include "video_core/shader/shader_ir.h" |
| 30 | #include "video_core/surface.h" | 30 | #include "video_core/surface.h" |
| @@ -113,17 +113,13 @@ namespace Vulkan { | |||
| 113 | class CachedShader final : public RasterizerCacheObject { | 113 | class CachedShader final : public RasterizerCacheObject { |
| 114 | public: | 114 | public: |
| 115 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, | 115 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, |
| 116 | VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); | 116 | VAddr cpu_addr, ProgramCode program_code, u32 main_offset); |
| 117 | ~CachedShader(); | 117 | ~CachedShader(); |
| 118 | 118 | ||
| 119 | GPUVAddr GetGpuAddr() const { | 119 | GPUVAddr GetGpuAddr() const { |
| 120 | return gpu_addr; | 120 | return gpu_addr; |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | VAddr GetCpuAddr() const override { | ||
| 124 | return cpu_addr; | ||
| 125 | } | ||
| 126 | |||
| 127 | std::size_t GetSizeInBytes() const override { | 123 | std::size_t GetSizeInBytes() const override { |
| 128 | return program_code.size() * sizeof(u64); | 124 | return program_code.size() * sizeof(u64); |
| 129 | } | 125 | } |
| @@ -149,7 +145,6 @@ private: | |||
| 149 | Tegra::Engines::ShaderType stage); | 145 | Tegra::Engines::ShaderType stage); |
| 150 | 146 | ||
| 151 | GPUVAddr gpu_addr{}; | 147 | GPUVAddr gpu_addr{}; |
| 152 | VAddr cpu_addr{}; | ||
| 153 | ProgramCode program_code; | 148 | ProgramCode program_code; |
| 154 | VideoCommon::Shader::Registry registry; | 149 | VideoCommon::Shader::Registry registry; |
| 155 | VideoCommon::Shader::ShaderIR shader_ir; | 150 | VideoCommon::Shader::ShaderIR shader_ir; |
| @@ -177,7 +172,7 @@ protected: | |||
| 177 | void FlushObjectInner(const Shader& object) override {} | 172 | void FlushObjectInner(const Shader& object) override {} |
| 178 | 173 | ||
| 179 | private: | 174 | private: |
| 180 | std::pair<SPIRVProgram, std::vector<vk::DescriptorSetLayoutBinding>> DecompileShaders( | 175 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( |
| 181 | const GraphicsPipelineCacheKey& key); | 176 | const GraphicsPipelineCacheKey& key); |
| 182 | 177 | ||
| 183 | Core::System& system; | 178 | Core::System& system; |
| @@ -199,6 +194,6 @@ private: | |||
| 199 | 194 | ||
| 200 | void FillDescriptorUpdateTemplateEntries( | 195 | void FillDescriptorUpdateTemplateEntries( |
| 201 | const ShaderEntries& entries, u32& binding, u32& offset, | 196 | const ShaderEntries& entries, u32& binding, u32& offset, |
| 202 | std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); | 197 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries); |
| 203 | 198 | ||
| 204 | } // namespace Vulkan | 199 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ffbf60dda..0966c7ff7 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp | |||
| @@ -8,19 +8,19 @@ | |||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | 11 | #include "video_core/renderer_vulkan/vk_device.h" |
| 13 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 12 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 14 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 13 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 15 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 16 | 16 | ||
| 17 | namespace Vulkan { | 17 | namespace Vulkan { |
| 18 | 18 | ||
| 19 | namespace { | 19 | namespace { |
| 20 | 20 | ||
| 21 | constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; | 21 | constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; |
| 22 | 22 | ||
| 23 | constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { | 23 | constexpr VkQueryType GetTarget(VideoCore::QueryType type) { |
| 24 | return QUERY_TARGETS[static_cast<std::size_t>(type)]; | 24 | return QUERY_TARGETS[static_cast<std::size_t>(type)]; |
| 25 | } | 25 | } |
| 26 | 26 | ||
| @@ -35,29 +35,34 @@ void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) | |||
| 35 | type = type_; | 35 | type = type_; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { | 38 | std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { |
| 39 | std::size_t index; | 39 | std::size_t index; |
| 40 | do { | 40 | do { |
| 41 | index = CommitResource(fence); | 41 | index = CommitResource(fence); |
| 42 | } while (usage[index]); | 42 | } while (usage[index]); |
| 43 | usage[index] = true; | 43 | usage[index] = true; |
| 44 | 44 | ||
| 45 | return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; | 45 | return {*pools[index / GROW_STEP], static_cast<u32>(index % GROW_STEP)}; |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | void QueryPool::Allocate(std::size_t begin, std::size_t end) { | 48 | void QueryPool::Allocate(std::size_t begin, std::size_t end) { |
| 49 | usage.resize(end); | 49 | usage.resize(end); |
| 50 | 50 | ||
| 51 | const auto dev = device->GetLogical(); | 51 | VkQueryPoolCreateInfo query_pool_ci; |
| 52 | const u32 size = static_cast<u32>(end - begin); | 52 | query_pool_ci.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; |
| 53 | const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); | 53 | query_pool_ci.pNext = nullptr; |
| 54 | pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); | 54 | query_pool_ci.flags = 0; |
| 55 | query_pool_ci.queryType = GetTarget(type); | ||
| 56 | query_pool_ci.queryCount = static_cast<u32>(end - begin); | ||
| 57 | query_pool_ci.pipelineStatistics = 0; | ||
| 58 | pools.push_back(device->GetLogical().CreateQueryPool(query_pool_ci)); | ||
| 55 | } | 59 | } |
| 56 | 60 | ||
| 57 | void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { | 61 | void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { |
| 58 | const auto it = | 62 | const auto it = |
| 59 | std::find_if(std::begin(pools), std::end(pools), | 63 | std::find_if(pools.begin(), pools.end(), [query_pool = query.first](vk::QueryPool& pool) { |
| 60 | [query_pool = query.first](auto& pool) { return query_pool == *pool; }); | 64 | return query_pool == *pool; |
| 65 | }); | ||
| 61 | ASSERT(it != std::end(pools)); | 66 | ASSERT(it != std::end(pools)); |
| 62 | 67 | ||
| 63 | const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); | 68 | const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); |
| @@ -76,12 +81,11 @@ VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& | |||
| 76 | 81 | ||
| 77 | VKQueryCache::~VKQueryCache() = default; | 82 | VKQueryCache::~VKQueryCache() = default; |
| 78 | 83 | ||
| 79 | std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { | 84 | std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { |
| 80 | return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); | 85 | return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); |
| 81 | } | 86 | } |
| 82 | 87 | ||
| 83 | void VKQueryCache::Reserve(VideoCore::QueryType type, | 88 | void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) { |
| 84 | std::pair<vk::QueryPool, std::uint32_t> query) { | ||
| 85 | query_pools[static_cast<std::size_t>(type)].Reserve(query); | 89 | query_pools[static_cast<std::size_t>(type)].Reserve(query); |
| 86 | } | 90 | } |
| 87 | 91 | ||
| @@ -89,10 +93,10 @@ HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> depen | |||
| 89 | VideoCore::QueryType type) | 93 | VideoCore::QueryType type) |
| 90 | : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, | 94 | : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, |
| 91 | type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { | 95 | type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { |
| 92 | const auto dev = cache.Device().GetLogical(); | 96 | const vk::Device* logical = &cache.Device().GetLogical(); |
| 93 | cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { | 97 | cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { |
| 94 | dev.resetQueryPoolEXT(query.first, query.second, 1, dld); | 98 | logical->ResetQueryPoolEXT(query.first, query.second, 1); |
| 95 | cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); | 99 | cmdbuf.BeginQuery(query.first, query.second, VK_QUERY_CONTROL_PRECISE_BIT); |
| 96 | }); | 100 | }); |
| 97 | } | 101 | } |
| 98 | 102 | ||
| @@ -101,22 +105,16 @@ HostCounter::~HostCounter() { | |||
| 101 | } | 105 | } |
| 102 | 106 | ||
| 103 | void HostCounter::EndQuery() { | 107 | void HostCounter::EndQuery() { |
| 104 | cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { | 108 | cache.Scheduler().Record( |
| 105 | cmdbuf.endQuery(query.first, query.second, dld); | 109 | [query = query](vk::CommandBuffer cmdbuf) { cmdbuf.EndQuery(query.first, query.second); }); |
| 106 | }); | ||
| 107 | } | 110 | } |
| 108 | 111 | ||
| 109 | u64 HostCounter::BlockingQuery() const { | 112 | u64 HostCounter::BlockingQuery() const { |
| 110 | if (ticks >= cache.Scheduler().Ticks()) { | 113 | if (ticks >= cache.Scheduler().Ticks()) { |
| 111 | cache.Scheduler().Flush(); | 114 | cache.Scheduler().Flush(); |
| 112 | } | 115 | } |
| 113 | 116 | return cache.Device().GetLogical().GetQueryResult<u64>( | |
| 114 | const auto dev = cache.Device().GetLogical(); | 117 | query.first, query.second, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); |
| 115 | const auto& dld = cache.Device().GetDispatchLoader(); | ||
| 116 | u64 value; | ||
| 117 | dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), | ||
| 118 | vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); | ||
| 119 | return value; | ||
| 120 | } | 118 | } |
| 121 | 119 | ||
| 122 | } // namespace Vulkan | 120 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index c3092ee96..b63784f4b 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h | |||
| @@ -12,8 +12,8 @@ | |||
| 12 | 12 | ||
| 13 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 14 | #include "video_core/query_cache.h" | 14 | #include "video_core/query_cache.h" |
| 15 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 17 | 17 | ||
| 18 | namespace VideoCore { | 18 | namespace VideoCore { |
| 19 | class RasterizerInterface; | 19 | class RasterizerInterface; |
| @@ -36,9 +36,9 @@ public: | |||
| 36 | 36 | ||
| 37 | void Initialize(const VKDevice& device, VideoCore::QueryType type); | 37 | void Initialize(const VKDevice& device, VideoCore::QueryType type); |
| 38 | 38 | ||
| 39 | std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); | 39 | std::pair<VkQueryPool, u32> Commit(VKFence& fence); |
| 40 | 40 | ||
| 41 | void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); | 41 | void Reserve(std::pair<VkQueryPool, u32> query); |
| 42 | 42 | ||
| 43 | protected: | 43 | protected: |
| 44 | void Allocate(std::size_t begin, std::size_t end) override; | 44 | void Allocate(std::size_t begin, std::size_t end) override; |
| @@ -49,7 +49,7 @@ private: | |||
| 49 | const VKDevice* device = nullptr; | 49 | const VKDevice* device = nullptr; |
| 50 | VideoCore::QueryType type = {}; | 50 | VideoCore::QueryType type = {}; |
| 51 | 51 | ||
| 52 | std::vector<UniqueQueryPool> pools; | 52 | std::vector<vk::QueryPool> pools; |
| 53 | std::vector<bool> usage; | 53 | std::vector<bool> usage; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| @@ -61,9 +61,9 @@ public: | |||
| 61 | const VKDevice& device, VKScheduler& scheduler); | 61 | const VKDevice& device, VKScheduler& scheduler); |
| 62 | ~VKQueryCache(); | 62 | ~VKQueryCache(); |
| 63 | 63 | ||
| 64 | std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); | 64 | std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type); |
| 65 | 65 | ||
| 66 | void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); | 66 | void Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query); |
| 67 | 67 | ||
| 68 | const VKDevice& Device() const noexcept { | 68 | const VKDevice& Device() const noexcept { |
| 69 | return device; | 69 | return device; |
| @@ -91,7 +91,7 @@ private: | |||
| 91 | 91 | ||
| 92 | VKQueryCache& cache; | 92 | VKQueryCache& cache; |
| 93 | const VideoCore::QueryType type; | 93 | const VideoCore::QueryType type; |
| 94 | const std::pair<vk::QueryPool, std::uint32_t> query; | 94 | const std::pair<VkQueryPool, u32> query; |
| 95 | const u64 ticks; | 95 | const u64 ticks; |
| 96 | }; | 96 | }; |
| 97 | 97 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 58c69b786..774ba1f26 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 20 | #include "video_core/engines/kepler_compute.h" | 20 | #include "video_core/engines/kepler_compute.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 23 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 24 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| @@ -39,6 +38,7 @@ | |||
| 39 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 38 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 40 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 39 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 41 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 40 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 41 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 42 | 42 | ||
| 43 | namespace Vulkan { | 43 | namespace Vulkan { |
| 44 | 44 | ||
| @@ -60,32 +60,42 @@ namespace { | |||
| 60 | 60 | ||
| 61 | constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); | 61 | constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute); |
| 62 | 62 | ||
| 63 | vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { | 63 | VkViewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { |
| 64 | const auto& viewport = regs.viewport_transform[index]; | 64 | const auto& src = regs.viewport_transform[index]; |
| 65 | const float x = viewport.translate_x - viewport.scale_x; | 65 | const float width = src.scale_x * 2.0f; |
| 66 | const float y = viewport.translate_y - viewport.scale_y; | 66 | const float height = src.scale_y * 2.0f; |
| 67 | const float width = viewport.scale_x * 2.0f; | ||
| 68 | const float height = viewport.scale_y * 2.0f; | ||
| 69 | 67 | ||
| 70 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; | 68 | VkViewport viewport; |
| 71 | float near = viewport.translate_z - viewport.scale_z * reduce_z; | 69 | viewport.x = src.translate_x - src.scale_x; |
| 72 | float far = viewport.translate_z + viewport.scale_z; | 70 | viewport.y = src.translate_y - src.scale_y; |
| 71 | viewport.width = width != 0.0f ? width : 1.0f; | ||
| 72 | viewport.height = height != 0.0f ? height : 1.0f; | ||
| 73 | |||
| 74 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; | ||
| 75 | viewport.minDepth = src.translate_z - src.scale_z * reduce_z; | ||
| 76 | viewport.maxDepth = src.translate_z + src.scale_z; | ||
| 73 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { | 77 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { |
| 74 | near = std::clamp(near, 0.0f, 1.0f); | 78 | viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); |
| 75 | far = std::clamp(far, 0.0f, 1.0f); | 79 | viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); |
| 76 | } | 80 | } |
| 77 | 81 | return viewport; | |
| 78 | return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far); | ||
| 79 | } | 82 | } |
| 80 | 83 | ||
| 81 | constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { | 84 | VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { |
| 82 | const auto& scissor = regs.scissor_test[index]; | 85 | const auto& src = regs.scissor_test[index]; |
| 83 | if (!scissor.enable) { | 86 | VkRect2D scissor; |
| 84 | return {{0, 0}, {INT32_MAX, INT32_MAX}}; | 87 | if (src.enable) { |
| 88 | scissor.offset.x = static_cast<s32>(src.min_x); | ||
| 89 | scissor.offset.y = static_cast<s32>(src.min_y); | ||
| 90 | scissor.extent.width = src.max_x - src.min_x; | ||
| 91 | scissor.extent.height = src.max_y - src.min_y; | ||
| 92 | } else { | ||
| 93 | scissor.offset.x = 0; | ||
| 94 | scissor.offset.y = 0; | ||
| 95 | scissor.extent.width = std::numeric_limits<s32>::max(); | ||
| 96 | scissor.extent.height = std::numeric_limits<s32>::max(); | ||
| 85 | } | 97 | } |
| 86 | const u32 width = scissor.max_x - scissor.min_x; | 98 | return scissor; |
| 87 | const u32 height = scissor.max_y - scissor.min_y; | ||
| 88 | return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}}; | ||
| 89 | } | 99 | } |
| 90 | 100 | ||
| 91 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | 101 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( |
| @@ -97,8 +107,8 @@ std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | |||
| 97 | return addresses; | 107 | return addresses; |
| 98 | } | 108 | } |
| 99 | 109 | ||
| 100 | void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage, | 110 | void TransitionImages(const std::vector<ImageView>& views, VkPipelineStageFlags pipeline_stage, |
| 101 | vk::AccessFlags access) { | 111 | VkAccessFlags access) { |
| 102 | for (auto& [view, layout] : views) { | 112 | for (auto& [view, layout] : views) { |
| 103 | view->Transition(*layout, pipeline_stage, access); | 113 | view->Transition(*layout, pipeline_stage, access); |
| 104 | } | 114 | } |
| @@ -127,13 +137,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry | |||
| 127 | 137 | ||
| 128 | class BufferBindings final { | 138 | class BufferBindings final { |
| 129 | public: | 139 | public: |
| 130 | void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { | 140 | void AddVertexBinding(const VkBuffer* buffer, VkDeviceSize offset) { |
| 131 | vertex.buffer_ptrs[vertex.num_buffers] = buffer; | 141 | vertex.buffer_ptrs[vertex.num_buffers] = buffer; |
| 132 | vertex.offsets[vertex.num_buffers] = offset; | 142 | vertex.offsets[vertex.num_buffers] = offset; |
| 133 | ++vertex.num_buffers; | 143 | ++vertex.num_buffers; |
| 134 | } | 144 | } |
| 135 | 145 | ||
| 136 | void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { | 146 | void SetIndexBinding(const VkBuffer* buffer, VkDeviceSize offset, VkIndexType type) { |
| 137 | index.buffer = buffer; | 147 | index.buffer = buffer; |
| 138 | index.offset = offset; | 148 | index.offset = offset; |
| 139 | index.type = type; | 149 | index.type = type; |
| @@ -217,14 +227,14 @@ private: | |||
| 217 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | 227 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. |
| 218 | struct { | 228 | struct { |
| 219 | std::size_t num_buffers = 0; | 229 | std::size_t num_buffers = 0; |
| 220 | std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs; | 230 | std::array<const VkBuffer*, Maxwell::NumVertexArrays> buffer_ptrs; |
| 221 | std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets; | 231 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; |
| 222 | } vertex; | 232 | } vertex; |
| 223 | 233 | ||
| 224 | struct { | 234 | struct { |
| 225 | const vk::Buffer* buffer = nullptr; | 235 | const VkBuffer* buffer = nullptr; |
| 226 | vk::DeviceSize offset; | 236 | VkDeviceSize offset; |
| 227 | vk::IndexType type; | 237 | VkIndexType type; |
| 228 | } index; | 238 | } index; |
| 229 | 239 | ||
| 230 | template <std::size_t N> | 240 | template <std::size_t N> |
| @@ -243,38 +253,35 @@ private: | |||
| 243 | return; | 253 | return; |
| 244 | } | 254 | } |
| 245 | 255 | ||
| 246 | std::array<vk::Buffer, N> buffers; | 256 | std::array<VkBuffer, N> buffers; |
| 247 | std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), | 257 | std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), |
| 248 | [](const auto ptr) { return *ptr; }); | 258 | [](const auto ptr) { return *ptr; }); |
| 249 | 259 | ||
| 250 | std::array<vk::DeviceSize, N> offsets; | 260 | std::array<VkDeviceSize, N> offsets; |
| 251 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | 261 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); |
| 252 | 262 | ||
| 253 | if constexpr (is_indexed) { | 263 | if constexpr (is_indexed) { |
| 254 | // Indexed draw | 264 | // Indexed draw |
| 255 | scheduler.Record([buffers, offsets, index_buffer = *index.buffer, | 265 | scheduler.Record([buffers, offsets, index_buffer = *index.buffer, |
| 256 | index_offset = index.offset, | 266 | index_offset = index.offset, |
| 257 | index_type = index.type](auto cmdbuf, auto& dld) { | 267 | index_type = index.type](vk::CommandBuffer cmdbuf) { |
| 258 | cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); | 268 | cmdbuf.BindIndexBuffer(index_buffer, index_offset, index_type); |
| 259 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | 269 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); |
| 260 | dld); | ||
| 261 | }); | 270 | }); |
| 262 | } else { | 271 | } else { |
| 263 | // Array draw | 272 | // Array draw |
| 264 | scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { | 273 | scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { |
| 265 | cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(), | 274 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); |
| 266 | dld); | ||
| 267 | }); | 275 | }); |
| 268 | } | 276 | } |
| 269 | } | 277 | } |
| 270 | }; | 278 | }; |
| 271 | 279 | ||
| 272 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, | 280 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { |
| 273 | const vk::DispatchLoaderDynamic& dld) const { | ||
| 274 | if (is_indexed) { | 281 | if (is_indexed) { |
| 275 | cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); | 282 | cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); |
| 276 | } else { | 283 | } else { |
| 277 | cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); | 284 | cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); |
| 278 | } | 285 | } |
| 279 | } | 286 | } |
| 280 | 287 | ||
| @@ -337,7 +344,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 337 | 344 | ||
| 338 | const auto renderpass = pipeline.GetRenderPass(); | 345 | const auto renderpass = pipeline.GetRenderPass(); |
| 339 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | 346 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); |
| 340 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); | 347 | scheduler.RequestRenderpass(renderpass, framebuffer, render_area); |
| 341 | 348 | ||
| 342 | UpdateDynamicStates(); | 349 | UpdateDynamicStates(); |
| 343 | 350 | ||
| @@ -345,19 +352,19 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 345 | 352 | ||
| 346 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | 353 | if (device.IsNvDeviceDiagnosticCheckpoints()) { |
| 347 | scheduler.Record( | 354 | scheduler.Record( |
| 348 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); | 355 | [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(&pipeline); }); |
| 349 | } | 356 | } |
| 350 | 357 | ||
| 351 | BeginTransformFeedback(); | 358 | BeginTransformFeedback(); |
| 352 | 359 | ||
| 353 | const auto pipeline_layout = pipeline.GetLayout(); | 360 | const auto pipeline_layout = pipeline.GetLayout(); |
| 354 | const auto descriptor_set = pipeline.CommitDescriptorSet(); | 361 | const auto descriptor_set = pipeline.CommitDescriptorSet(); |
| 355 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { | 362 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |
| 356 | if (descriptor_set) { | 363 | if (descriptor_set) { |
| 357 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, | 364 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, |
| 358 | DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); | 365 | DESCRIPTOR_SET, descriptor_set, {}); |
| 359 | } | 366 | } |
| 360 | draw_params.Draw(cmdbuf, dld); | 367 | draw_params.Draw(cmdbuf); |
| 361 | }); | 368 | }); |
| 362 | 369 | ||
| 363 | EndTransformFeedback(); | 370 | EndTransformFeedback(); |
| @@ -389,48 +396,54 @@ void RasterizerVulkan::Clear() { | |||
| 389 | DEBUG_ASSERT(texceptions.none()); | 396 | DEBUG_ASSERT(texceptions.none()); |
| 390 | SetupImageTransitions(0, color_attachments, zeta_attachment); | 397 | SetupImageTransitions(0, color_attachments, zeta_attachment); |
| 391 | 398 | ||
| 392 | const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); | 399 | const VkRenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); |
| 393 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); | 400 | const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); |
| 394 | scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); | 401 | scheduler.RequestRenderpass(renderpass, framebuffer, render_area); |
| 395 | |||
| 396 | const auto& scissor = regs.scissor_test[0]; | ||
| 397 | const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); | ||
| 398 | vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; | ||
| 399 | scissor_extent.width = std::min(scissor_extent.width, render_area.width); | ||
| 400 | scissor_extent.height = std::min(scissor_extent.height, render_area.height); | ||
| 401 | 402 | ||
| 402 | const u32 layer = regs.clear_buffers.layer; | 403 | VkClearRect clear_rect; |
| 403 | const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); | 404 | clear_rect.baseArrayLayer = regs.clear_buffers.layer; |
| 405 | clear_rect.layerCount = 1; | ||
| 406 | clear_rect.rect = GetScissorState(regs, 0); | ||
| 407 | clear_rect.rect.extent.width = std::min(clear_rect.rect.extent.width, render_area.width); | ||
| 408 | clear_rect.rect.extent.height = std::min(clear_rect.rect.extent.height, render_area.height); | ||
| 404 | 409 | ||
| 405 | if (use_color) { | 410 | if (use_color) { |
| 406 | const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], | 411 | VkClearValue clear_value; |
| 407 | regs.clear_color[2], regs.clear_color[3]}; | 412 | std::memcpy(clear_value.color.float32, regs.clear_color, sizeof(regs.clear_color)); |
| 408 | const vk::ClearValue clear_value{clear_color}; | 413 | |
| 409 | const u32 color_attachment = regs.clear_buffers.RT; | 414 | const u32 color_attachment = regs.clear_buffers.RT; |
| 410 | scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { | 415 | scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { |
| 411 | const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, | 416 | VkClearAttachment attachment; |
| 412 | clear_value); | 417 | attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; |
| 413 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); | 418 | attachment.colorAttachment = color_attachment; |
| 419 | attachment.clearValue = clear_value; | ||
| 420 | cmdbuf.ClearAttachments(attachment, clear_rect); | ||
| 414 | }); | 421 | }); |
| 415 | } | 422 | } |
| 416 | 423 | ||
| 417 | if (!use_depth && !use_stencil) { | 424 | if (!use_depth && !use_stencil) { |
| 418 | return; | 425 | return; |
| 419 | } | 426 | } |
| 420 | vk::ImageAspectFlags aspect_flags; | 427 | VkImageAspectFlags aspect_flags = 0; |
| 421 | if (use_depth) { | 428 | if (use_depth) { |
| 422 | aspect_flags |= vk::ImageAspectFlagBits::eDepth; | 429 | aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; |
| 423 | } | 430 | } |
| 424 | if (use_stencil) { | 431 | if (use_stencil) { |
| 425 | aspect_flags |= vk::ImageAspectFlagBits::eStencil; | 432 | aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; |
| 426 | } | 433 | } |
| 427 | 434 | ||
| 428 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | 435 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, |
| 429 | clear_rect, aspect_flags](auto cmdbuf, auto& dld) { | 436 | clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { |
| 430 | const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); | 437 | VkClearValue clear_value; |
| 431 | const vk::ClearValue clear_value{clear_zeta}; | 438 | clear_value.depthStencil.depth = clear_depth; |
| 432 | const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); | 439 | clear_value.depthStencil.stencil = clear_stencil; |
| 433 | cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); | 440 | |
| 441 | VkClearAttachment attachment; | ||
| 442 | attachment.aspectMask = aspect_flags; | ||
| 443 | attachment.colorAttachment = 0; | ||
| 444 | attachment.clearValue.depthStencil.depth = clear_depth; | ||
| 445 | attachment.clearValue.depthStencil.stencil = clear_stencil; | ||
| 446 | cmdbuf.ClearAttachments(attachment, clear_rect); | ||
| 434 | }); | 447 | }); |
| 435 | } | 448 | } |
| 436 | 449 | ||
| @@ -463,24 +476,24 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 463 | 476 | ||
| 464 | buffer_cache.Unmap(); | 477 | buffer_cache.Unmap(); |
| 465 | 478 | ||
| 466 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, | 479 | TransitionImages(sampled_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 467 | vk::AccessFlagBits::eShaderRead); | 480 | VK_ACCESS_SHADER_READ_BIT); |
| 468 | TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, | 481 | TransitionImages(image_views, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 469 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | 482 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); |
| 470 | 483 | ||
| 471 | if (device.IsNvDeviceDiagnosticCheckpoints()) { | 484 | if (device.IsNvDeviceDiagnosticCheckpoints()) { |
| 472 | scheduler.Record( | 485 | scheduler.Record( |
| 473 | [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); | 486 | [&pipeline](vk::CommandBuffer cmdbuf) { cmdbuf.SetCheckpointNV(nullptr); }); |
| 474 | } | 487 | } |
| 475 | 488 | ||
| 476 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, | 489 | scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, |
| 477 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), | 490 | grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), |
| 478 | layout = pipeline.GetLayout(), | 491 | layout = pipeline.GetLayout(), |
| 479 | descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { | 492 | descriptor_set = pipeline.CommitDescriptorSet()](vk::CommandBuffer cmdbuf) { |
| 480 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); | 493 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); |
| 481 | cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, | 494 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, DESCRIPTOR_SET, |
| 482 | &descriptor_set, 0, nullptr, dld); | 495 | descriptor_set, {}); |
| 483 | cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); | 496 | cmdbuf.Dispatch(grid_x, grid_y, grid_z); |
| 484 | }); | 497 | }); |
| 485 | } | 498 | } |
| 486 | 499 | ||
| @@ -495,20 +508,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 495 | 508 | ||
| 496 | void RasterizerVulkan::FlushAll() {} | 509 | void RasterizerVulkan::FlushAll() {} |
| 497 | 510 | ||
| 498 | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | 511 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { |
| 512 | if (addr == 0 || size == 0) { | ||
| 513 | return; | ||
| 514 | } | ||
| 499 | texture_cache.FlushRegion(addr, size); | 515 | texture_cache.FlushRegion(addr, size); |
| 500 | buffer_cache.FlushRegion(addr, size); | 516 | buffer_cache.FlushRegion(addr, size); |
| 501 | query_cache.FlushRegion(addr, size); | 517 | query_cache.FlushRegion(addr, size); |
| 502 | } | 518 | } |
| 503 | 519 | ||
| 504 | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | 520 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| 521 | if (addr == 0 || size == 0) { | ||
| 522 | return; | ||
| 523 | } | ||
| 505 | texture_cache.InvalidateRegion(addr, size); | 524 | texture_cache.InvalidateRegion(addr, size); |
| 506 | pipeline_cache.InvalidateRegion(addr, size); | 525 | pipeline_cache.InvalidateRegion(addr, size); |
| 507 | buffer_cache.InvalidateRegion(addr, size); | 526 | buffer_cache.InvalidateRegion(addr, size); |
| 508 | query_cache.InvalidateRegion(addr, size); | 527 | query_cache.InvalidateRegion(addr, size); |
| 509 | } | 528 | } |
| 510 | 529 | ||
| 511 | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 530 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 512 | FlushRegion(addr, size); | 531 | FlushRegion(addr, size); |
| 513 | InvalidateRegion(addr, size); | 532 | InvalidateRegion(addr, size); |
| 514 | } | 533 | } |
| @@ -540,8 +559,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 540 | return false; | 559 | return false; |
| 541 | } | 560 | } |
| 542 | 561 | ||
| 543 | const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; | 562 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; |
| 544 | const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; | ||
| 545 | if (!surface) { | 563 | if (!surface) { |
| 546 | return false; | 564 | return false; |
| 547 | } | 565 | } |
| @@ -594,7 +612,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | |||
| 594 | Texceptions texceptions; | 612 | Texceptions texceptions; |
| 595 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { | 613 | for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { |
| 596 | if (update_rendertargets) { | 614 | if (update_rendertargets) { |
| 597 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); | 615 | color_attachments[rt] = texture_cache.GetColorBufferSurface(rt); |
| 598 | } | 616 | } |
| 599 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { | 617 | if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { |
| 600 | texceptions[rt] = true; | 618 | texceptions[rt] = true; |
| @@ -602,7 +620,7 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { | |||
| 602 | } | 620 | } |
| 603 | 621 | ||
| 604 | if (update_rendertargets) { | 622 | if (update_rendertargets) { |
| 605 | zeta_attachment = texture_cache.GetDepthBufferSurface(true); | 623 | zeta_attachment = texture_cache.GetDepthBufferSurface(); |
| 606 | } | 624 | } |
| 607 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { | 625 | if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { |
| 608 | texceptions[ZETA_TEXCEPTION_INDEX] = true; | 626 | texceptions[ZETA_TEXCEPTION_INDEX] = true; |
| @@ -620,13 +638,13 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen | |||
| 620 | continue; | 638 | continue; |
| 621 | } | 639 | } |
| 622 | overlap = true; | 640 | overlap = true; |
| 623 | *layout = vk::ImageLayout::eGeneral; | 641 | *layout = VK_IMAGE_LAYOUT_GENERAL; |
| 624 | } | 642 | } |
| 625 | return overlap; | 643 | return overlap; |
| 626 | } | 644 | } |
| 627 | 645 | ||
| 628 | std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( | 646 | std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( |
| 629 | vk::RenderPass renderpass) { | 647 | VkRenderPass renderpass) { |
| 630 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), | 648 | FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), |
| 631 | std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; | 649 | std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; |
| 632 | 650 | ||
| @@ -653,15 +671,20 @@ std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffer | |||
| 653 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); | 671 | const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); |
| 654 | auto& framebuffer = fbentry->second; | 672 | auto& framebuffer = fbentry->second; |
| 655 | if (is_cache_miss) { | 673 | if (is_cache_miss) { |
| 656 | const vk::FramebufferCreateInfo framebuffer_ci( | 674 | VkFramebufferCreateInfo framebuffer_ci; |
| 657 | {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, | 675 | framebuffer_ci.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; |
| 658 | key.height, key.layers); | 676 | framebuffer_ci.pNext = nullptr; |
| 659 | const auto dev = device.GetLogical(); | 677 | framebuffer_ci.flags = 0; |
| 660 | const auto& dld = device.GetDispatchLoader(); | 678 | framebuffer_ci.renderPass = key.renderpass; |
| 661 | framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); | 679 | framebuffer_ci.attachmentCount = static_cast<u32>(key.views.size()); |
| 662 | } | 680 | framebuffer_ci.pAttachments = key.views.data(); |
| 663 | 681 | framebuffer_ci.width = key.width; | |
| 664 | return {*framebuffer, vk::Extent2D{key.width, key.height}}; | 682 | framebuffer_ci.height = key.height; |
| 683 | framebuffer_ci.layers = key.layers; | ||
| 684 | framebuffer = device.GetLogical().CreateFramebuffer(framebuffer_ci); | ||
| 685 | } | ||
| 686 | |||
| 687 | return {*framebuffer, VkExtent2D{key.width, key.height}}; | ||
| 665 | } | 688 | } |
| 666 | 689 | ||
| 667 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | 690 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, |
| @@ -709,10 +732,9 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 709 | void RasterizerVulkan::SetupImageTransitions( | 732 | void RasterizerVulkan::SetupImageTransitions( |
| 710 | Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | 733 | Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments, |
| 711 | const View& zeta_attachment) { | 734 | const View& zeta_attachment) { |
| 712 | TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, | 735 | TransitionImages(sampled_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_ACCESS_SHADER_READ_BIT); |
| 713 | vk::AccessFlagBits::eShaderRead); | 736 | TransitionImages(image_views, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, |
| 714 | TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, | 737 | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT); |
| 715 | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); | ||
| 716 | 738 | ||
| 717 | for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { | 739 | for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { |
| 718 | const auto color_attachment = color_attachments[rt]; | 740 | const auto color_attachment = color_attachments[rt]; |
| @@ -720,19 +742,19 @@ void RasterizerVulkan::SetupImageTransitions( | |||
| 720 | continue; | 742 | continue; |
| 721 | } | 743 | } |
| 722 | const auto image_layout = | 744 | const auto image_layout = |
| 723 | texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; | 745 | texceptions[rt] ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; |
| 724 | color_attachment->Transition( | 746 | color_attachment->Transition(image_layout, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| 725 | image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, | 747 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | |
| 726 | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); | 748 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); |
| 727 | } | 749 | } |
| 728 | 750 | ||
| 729 | if (zeta_attachment != nullptr) { | 751 | if (zeta_attachment != nullptr) { |
| 730 | const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] | 752 | const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] |
| 731 | ? vk::ImageLayout::eGeneral | 753 | ? VK_IMAGE_LAYOUT_GENERAL |
| 732 | : vk::ImageLayout::eDepthStencilAttachmentOptimal; | 754 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; |
| 733 | zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, | 755 | zeta_attachment->Transition(image_layout, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, |
| 734 | vk::AccessFlagBits::eDepthStencilAttachmentRead | | 756 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | |
| 735 | vk::AccessFlagBits::eDepthStencilAttachmentWrite); | 757 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT); |
| 736 | } | 758 | } |
| 737 | } | 759 | } |
| 738 | 760 | ||
| @@ -768,9 +790,9 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 768 | const std::size_t size = binding.buffer_size; | 790 | const std::size_t size = binding.buffer_size; |
| 769 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | 791 | const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); |
| 770 | 792 | ||
| 771 | scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) { | 793 | scheduler.Record([buffer = *buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { |
| 772 | cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld); | 794 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); |
| 773 | cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld); | 795 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); |
| 774 | }); | 796 | }); |
| 775 | } | 797 | } |
| 776 | 798 | ||
| @@ -781,7 +803,7 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 781 | } | 803 | } |
| 782 | 804 | ||
| 783 | scheduler.Record( | 805 | scheduler.Record( |
| 784 | [](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); }); | 806 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 785 | } | 807 | } |
| 786 | 808 | ||
| 787 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, | 809 | void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, |
| @@ -832,7 +854,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
| 832 | } else { | 854 | } else { |
| 833 | const auto [buffer, offset] = | 855 | const auto [buffer, offset] = |
| 834 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | 856 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); |
| 835 | buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); | 857 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); |
| 836 | params.base_vertex = 0; | 858 | params.base_vertex = 0; |
| 837 | params.num_vertices = params.num_vertices * 6 / 4; | 859 | params.num_vertices = params.num_vertices * 6 / 4; |
| 838 | params.is_indexed = true; | 860 | params.is_indexed = true; |
| @@ -1017,7 +1039,7 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu | |||
| 1017 | update_descriptor_queue.AddSampledImage(sampler, image_view); | 1039 | update_descriptor_queue.AddSampledImage(sampler, image_view); |
| 1018 | 1040 | ||
| 1019 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | 1041 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); |
| 1020 | *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; | 1042 | *image_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; |
| 1021 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | 1043 | sampled_views.push_back(ImageView{std::move(view), image_layout}); |
| 1022 | } | 1044 | } |
| 1023 | 1045 | ||
| @@ -1034,7 +1056,7 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima | |||
| 1034 | update_descriptor_queue.AddImage(image_view); | 1056 | update_descriptor_queue.AddImage(image_view); |
| 1035 | 1057 | ||
| 1036 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | 1058 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); |
| 1037 | *image_layout = vk::ImageLayout::eGeneral; | 1059 | *image_layout = VK_IMAGE_LAYOUT_GENERAL; |
| 1038 | image_views.push_back(ImageView{std::move(view), image_layout}); | 1060 | image_views.push_back(ImageView{std::move(view), image_layout}); |
| 1039 | } | 1061 | } |
| 1040 | 1062 | ||
| @@ -1051,9 +1073,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 1051 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | 1073 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), |
| 1052 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | 1074 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), |
| 1053 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | 1075 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; |
| 1054 | scheduler.Record([viewports](auto cmdbuf, auto& dld) { | 1076 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); |
| 1055 | cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld); | ||
| 1056 | }); | ||
| 1057 | } | 1077 | } |
| 1058 | 1078 | ||
| 1059 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 1079 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| @@ -1067,9 +1087,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 1067 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | 1087 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), |
| 1068 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | 1088 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), |
| 1069 | GetScissorState(regs, 15)}; | 1089 | GetScissorState(regs, 15)}; |
| 1070 | scheduler.Record([scissors](auto cmdbuf, auto& dld) { | 1090 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); |
| 1071 | cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld); | ||
| 1072 | }); | ||
| 1073 | } | 1091 | } |
| 1074 | 1092 | ||
| 1075 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { | 1093 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { |
| @@ -1077,8 +1095,8 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { | |||
| 1077 | return; | 1095 | return; |
| 1078 | } | 1096 | } |
| 1079 | scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, | 1097 | scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, |
| 1080 | factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { | 1098 | factor = regs.polygon_offset_factor](vk::CommandBuffer cmdbuf) { |
| 1081 | cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); | 1099 | cmdbuf.SetDepthBias(constant, clamp, factor / 2.0f); |
| 1082 | }); | 1100 | }); |
| 1083 | } | 1101 | } |
| 1084 | 1102 | ||
| @@ -1088,9 +1106,8 @@ void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 1088 | } | 1106 | } |
| 1089 | const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, | 1107 | const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, |
| 1090 | regs.blend_color.a}; | 1108 | regs.blend_color.a}; |
| 1091 | scheduler.Record([blend_color](auto cmdbuf, auto& dld) { | 1109 | scheduler.Record( |
| 1092 | cmdbuf.setBlendConstants(blend_color.data(), dld); | 1110 | [blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); }); |
| 1093 | }); | ||
| 1094 | } | 1111 | } |
| 1095 | 1112 | ||
| 1096 | void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { | 1113 | void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { |
| @@ -1098,7 +1115,7 @@ void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) | |||
| 1098 | return; | 1115 | return; |
| 1099 | } | 1116 | } |
| 1100 | scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( | 1117 | scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( |
| 1101 | auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); | 1118 | vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); }); |
| 1102 | } | 1119 | } |
| 1103 | 1120 | ||
| 1104 | void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { | 1121 | void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { |
| @@ -1111,24 +1128,24 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) | |||
| 1111 | [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, | 1128 | [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, |
| 1112 | front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, | 1129 | front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, |
| 1113 | back_write_mask = regs.stencil_back_mask, | 1130 | back_write_mask = regs.stencil_back_mask, |
| 1114 | back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | 1131 | back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { |
| 1115 | // Front face | 1132 | // Front face |
| 1116 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); | 1133 | cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref); |
| 1117 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); | 1134 | cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask); |
| 1118 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); | 1135 | cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask); |
| 1119 | 1136 | ||
| 1120 | // Back face | 1137 | // Back face |
| 1121 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); | 1138 | cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); |
| 1122 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); | 1139 | cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); |
| 1123 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); | 1140 | cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); |
| 1124 | }); | 1141 | }); |
| 1125 | } else { | 1142 | } else { |
| 1126 | // Front face defines both faces | 1143 | // Front face defines both faces |
| 1127 | scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, | 1144 | scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, |
| 1128 | test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { | 1145 | test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) { |
| 1129 | cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); | 1146 | cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref); |
| 1130 | cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); | 1147 | cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask); |
| 1131 | cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); | 1148 | cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask); |
| 1132 | }); | 1149 | }); |
| 1133 | } | 1150 | } |
| 1134 | } | 1151 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3185868e9..46037860a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include "video_core/memory_manager.h" | 17 | #include "video_core/memory_manager.h" |
| 18 | #include "video_core/rasterizer_accelerated.h" | 18 | #include "video_core/rasterizer_accelerated.h" |
| 19 | #include "video_core/rasterizer_interface.h" | 19 | #include "video_core/rasterizer_interface.h" |
| 20 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 21 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 22 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 23 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 22 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -32,6 +31,7 @@ | |||
| 32 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 31 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 33 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 32 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 34 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 33 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 34 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 35 | 35 | ||
| 36 | namespace Core { | 36 | namespace Core { |
| 37 | class System; | 37 | class System; |
| @@ -49,11 +49,10 @@ namespace Vulkan { | |||
| 49 | 49 | ||
| 50 | struct VKScreenInfo; | 50 | struct VKScreenInfo; |
| 51 | 51 | ||
| 52 | using ImageViewsPack = | 52 | using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>; |
| 53 | boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>; | ||
| 54 | 53 | ||
| 55 | struct FramebufferCacheKey { | 54 | struct FramebufferCacheKey { |
| 56 | vk::RenderPass renderpass{}; | 55 | VkRenderPass renderpass{}; |
| 57 | u32 width = 0; | 56 | u32 width = 0; |
| 58 | u32 height = 0; | 57 | u32 height = 0; |
| 59 | u32 layers = 0; | 58 | u32 layers = 0; |
| @@ -101,7 +100,7 @@ class BufferBindings; | |||
| 101 | 100 | ||
| 102 | struct ImageView { | 101 | struct ImageView { |
| 103 | View view; | 102 | View view; |
| 104 | vk::ImageLayout* layout = nullptr; | 103 | VkImageLayout* layout = nullptr; |
| 105 | }; | 104 | }; |
| 106 | 105 | ||
| 107 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 106 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| @@ -118,9 +117,9 @@ public: | |||
| 118 | void ResetCounter(VideoCore::QueryType type) override; | 117 | void ResetCounter(VideoCore::QueryType type) override; |
| 119 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 118 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 120 | void FlushAll() override; | 119 | void FlushAll() override; |
| 121 | void FlushRegion(CacheAddr addr, u64 size) override; | 120 | void FlushRegion(VAddr addr, u64 size) override; |
| 122 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 121 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 123 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 122 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 124 | void FlushCommands() override; | 123 | void FlushCommands() override; |
| 125 | void TickFrame() override; | 124 | void TickFrame() override; |
| 126 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 125 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| @@ -137,7 +136,7 @@ public: | |||
| 137 | 136 | ||
| 138 | private: | 137 | private: |
| 139 | struct DrawParameters { | 138 | struct DrawParameters { |
| 140 | void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; | 139 | void Draw(vk::CommandBuffer cmdbuf) const; |
| 141 | 140 | ||
| 142 | u32 base_instance = 0; | 141 | u32 base_instance = 0; |
| 143 | u32 num_instances = 0; | 142 | u32 num_instances = 0; |
| @@ -154,7 +153,7 @@ private: | |||
| 154 | 153 | ||
| 155 | Texceptions UpdateAttachments(); | 154 | Texceptions UpdateAttachments(); |
| 156 | 155 | ||
| 157 | std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass); | 156 | std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); |
| 158 | 157 | ||
| 159 | /// Setups geometry buffers and state. | 158 | /// Setups geometry buffers and state. |
| 160 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | 159 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, |
| @@ -272,7 +271,7 @@ private: | |||
| 272 | u32 draw_counter = 0; | 271 | u32 draw_counter = 0; |
| 273 | 272 | ||
| 274 | // TODO(Rodrigo): Invalidate on image destruction | 273 | // TODO(Rodrigo): Invalidate on image destruction |
| 275 | std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache; | 274 | std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache; |
| 276 | }; | 275 | }; |
| 277 | 276 | ||
| 278 | } // namespace Vulkan | 277 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 93f5d7ba0..4e5286a69 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp | |||
| @@ -6,10 +6,10 @@ | |||
| 6 | #include <vector> | 6 | #include <vector> |
| 7 | 7 | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 9 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | 10 | #include "video_core/renderer_vulkan/vk_device.h" |
| 12 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 11 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -17,7 +17,7 @@ VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} | |||
| 17 | 17 | ||
| 18 | VKRenderPassCache::~VKRenderPassCache() = default; | 18 | VKRenderPassCache::~VKRenderPassCache() = default; |
| 19 | 19 | ||
| 20 | vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { | 20 | VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { |
| 21 | const auto [pair, is_cache_miss] = cache.try_emplace(params); | 21 | const auto [pair, is_cache_miss] = cache.try_emplace(params); |
| 22 | auto& entry = pair->second; | 22 | auto& entry = pair->second; |
| 23 | if (is_cache_miss) { | 23 | if (is_cache_miss) { |
| @@ -26,9 +26,9 @@ vk::RenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) | |||
| 26 | return *entry; | 26 | return *entry; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { | 29 | vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { |
| 30 | std::vector<vk::AttachmentDescription> descriptors; | 30 | std::vector<VkAttachmentDescription> descriptors; |
| 31 | std::vector<vk::AttachmentReference> color_references; | 31 | std::vector<VkAttachmentReference> color_references; |
| 32 | 32 | ||
| 33 | for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { | 33 | for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { |
| 34 | const auto attachment = params.color_attachments[rt]; | 34 | const auto attachment = params.color_attachments[rt]; |
| @@ -39,16 +39,25 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par | |||
| 39 | 39 | ||
| 40 | // TODO(Rodrigo): Add eMayAlias when it's needed. | 40 | // TODO(Rodrigo): Add eMayAlias when it's needed. |
| 41 | const auto color_layout = attachment.is_texception | 41 | const auto color_layout = attachment.is_texception |
| 42 | ? vk::ImageLayout::eGeneral | 42 | ? VK_IMAGE_LAYOUT_GENERAL |
| 43 | : vk::ImageLayout::eColorAttachmentOptimal; | 43 | : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; |
| 44 | descriptors.emplace_back(vk::AttachmentDescriptionFlagBits::eMayAlias, format.format, | 44 | VkAttachmentDescription& descriptor = descriptors.emplace_back(); |
| 45 | vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, | 45 | descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; |
| 46 | vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare, | 46 | descriptor.format = format.format; |
| 47 | vk::AttachmentStoreOp::eDontCare, color_layout, color_layout); | 47 | descriptor.samples = VK_SAMPLE_COUNT_1_BIT; |
| 48 | color_references.emplace_back(static_cast<u32>(rt), color_layout); | 48 | descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; |
| 49 | descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; | ||
| 50 | descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; | ||
| 51 | descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; | ||
| 52 | descriptor.initialLayout = color_layout; | ||
| 53 | descriptor.finalLayout = color_layout; | ||
| 54 | |||
| 55 | VkAttachmentReference& reference = color_references.emplace_back(); | ||
| 56 | reference.attachment = static_cast<u32>(rt); | ||
| 57 | reference.layout = color_layout; | ||
| 49 | } | 58 | } |
| 50 | 59 | ||
| 51 | vk::AttachmentReference zeta_attachment_ref; | 60 | VkAttachmentReference zeta_attachment_ref; |
| 52 | if (params.has_zeta) { | 61 | if (params.has_zeta) { |
| 53 | const auto format = | 62 | const auto format = |
| 54 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); | 63 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); |
| @@ -56,45 +65,68 @@ UniqueRenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& par | |||
| 56 | static_cast<u32>(params.zeta_pixel_format)); | 65 | static_cast<u32>(params.zeta_pixel_format)); |
| 57 | 66 | ||
| 58 | const auto zeta_layout = params.zeta_texception | 67 | const auto zeta_layout = params.zeta_texception |
| 59 | ? vk::ImageLayout::eGeneral | 68 | ? VK_IMAGE_LAYOUT_GENERAL |
| 60 | : vk::ImageLayout::eDepthStencilAttachmentOptimal; | 69 | : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; |
| 61 | descriptors.emplace_back(vk::AttachmentDescriptionFlags{}, format.format, | 70 | VkAttachmentDescription& descriptor = descriptors.emplace_back(); |
| 62 | vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eLoad, | 71 | descriptor.flags = 0; |
| 63 | vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eLoad, | 72 | descriptor.format = format.format; |
| 64 | vk::AttachmentStoreOp::eStore, zeta_layout, zeta_layout); | 73 | descriptor.samples = VK_SAMPLE_COUNT_1_BIT; |
| 65 | zeta_attachment_ref = | 74 | descriptor.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; |
| 66 | vk::AttachmentReference(static_cast<u32>(params.color_attachments.size()), zeta_layout); | 75 | descriptor.storeOp = VK_ATTACHMENT_STORE_OP_STORE; |
| 76 | descriptor.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; | ||
| 77 | descriptor.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; | ||
| 78 | descriptor.initialLayout = zeta_layout; | ||
| 79 | descriptor.finalLayout = zeta_layout; | ||
| 80 | |||
| 81 | zeta_attachment_ref.attachment = static_cast<u32>(params.color_attachments.size()); | ||
| 82 | zeta_attachment_ref.layout = zeta_layout; | ||
| 67 | } | 83 | } |
| 68 | 84 | ||
| 69 | const vk::SubpassDescription subpass_description( | 85 | VkSubpassDescription subpass_description; |
| 70 | {}, vk::PipelineBindPoint::eGraphics, 0, nullptr, static_cast<u32>(color_references.size()), | 86 | subpass_description.flags = 0; |
| 71 | color_references.data(), nullptr, params.has_zeta ? &zeta_attachment_ref : nullptr, 0, | 87 | subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; |
| 72 | nullptr); | 88 | subpass_description.inputAttachmentCount = 0; |
| 73 | 89 | subpass_description.pInputAttachments = nullptr; | |
| 74 | vk::AccessFlags access; | 90 | subpass_description.colorAttachmentCount = static_cast<u32>(color_references.size()); |
| 75 | vk::PipelineStageFlags stage; | 91 | subpass_description.pColorAttachments = color_references.data(); |
| 92 | subpass_description.pResolveAttachments = nullptr; | ||
| 93 | subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr; | ||
| 94 | subpass_description.preserveAttachmentCount = 0; | ||
| 95 | subpass_description.pPreserveAttachments = nullptr; | ||
| 96 | |||
| 97 | VkAccessFlags access = 0; | ||
| 98 | VkPipelineStageFlags stage = 0; | ||
| 76 | if (!color_references.empty()) { | 99 | if (!color_references.empty()) { |
| 77 | access |= | 100 | access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; |
| 78 | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite; | 101 | stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; |
| 79 | stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput; | ||
| 80 | } | 102 | } |
| 81 | 103 | ||
| 82 | if (params.has_zeta) { | 104 | if (params.has_zeta) { |
| 83 | access |= vk::AccessFlagBits::eDepthStencilAttachmentRead | | 105 | access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | |
| 84 | vk::AccessFlagBits::eDepthStencilAttachmentWrite; | 106 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; |
| 85 | stage |= vk::PipelineStageFlagBits::eLateFragmentTests; | 107 | stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; |
| 86 | } | 108 | } |
| 87 | 109 | ||
| 88 | const vk::SubpassDependency subpass_dependency(VK_SUBPASS_EXTERNAL, 0, stage, stage, {}, access, | 110 | VkSubpassDependency subpass_dependency; |
| 89 | {}); | 111 | subpass_dependency.srcSubpass = VK_SUBPASS_EXTERNAL; |
| 90 | 112 | subpass_dependency.dstSubpass = 0; | |
| 91 | const vk::RenderPassCreateInfo create_info({}, static_cast<u32>(descriptors.size()), | 113 | subpass_dependency.srcStageMask = stage; |
| 92 | descriptors.data(), 1, &subpass_description, 1, | 114 | subpass_dependency.dstStageMask = stage; |
| 93 | &subpass_dependency); | 115 | subpass_dependency.srcAccessMask = 0; |
| 94 | 116 | subpass_dependency.dstAccessMask = access; | |
| 95 | const auto dev = device.GetLogical(); | 117 | subpass_dependency.dependencyFlags = 0; |
| 96 | const auto& dld = device.GetDispatchLoader(); | 118 | |
| 97 | return dev.createRenderPassUnique(create_info, nullptr, dld); | 119 | VkRenderPassCreateInfo ci; |
| 120 | ci.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; | ||
| 121 | ci.pNext = nullptr; | ||
| 122 | ci.flags = 0; | ||
| 123 | ci.attachmentCount = static_cast<u32>(descriptors.size()); | ||
| 124 | ci.pAttachments = descriptors.data(); | ||
| 125 | ci.subpassCount = 1; | ||
| 126 | ci.pSubpasses = &subpass_description; | ||
| 127 | ci.dependencyCount = 1; | ||
| 128 | ci.pDependencies = &subpass_dependency; | ||
| 129 | return device.GetLogical().CreateRenderPass(ci); | ||
| 98 | } | 130 | } |
| 99 | 131 | ||
| 100 | } // namespace Vulkan | 132 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index b49b2db48..921b6efb5 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include <boost/functional/hash.hpp> | 12 | #include <boost/functional/hash.hpp> |
| 13 | 13 | ||
| 14 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/engines/maxwell_3d.h" |
| 15 | #include "video_core/renderer_vulkan/declarations.h" | 15 | #include "video_core/renderer_vulkan/wrapper.h" |
| 16 | #include "video_core/surface.h" | 16 | #include "video_core/surface.h" |
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| @@ -85,13 +85,13 @@ public: | |||
| 85 | explicit VKRenderPassCache(const VKDevice& device); | 85 | explicit VKRenderPassCache(const VKDevice& device); |
| 86 | ~VKRenderPassCache(); | 86 | ~VKRenderPassCache(); |
| 87 | 87 | ||
| 88 | vk::RenderPass GetRenderPass(const RenderPassParams& params); | 88 | VkRenderPass GetRenderPass(const RenderPassParams& params); |
| 89 | 89 | ||
| 90 | private: | 90 | private: |
| 91 | UniqueRenderPass CreateRenderPass(const RenderPassParams& params) const; | 91 | vk::RenderPass CreateRenderPass(const RenderPassParams& params) const; |
| 92 | 92 | ||
| 93 | const VKDevice& device; | 93 | const VKDevice& device; |
| 94 | std::unordered_map<RenderPassParams, UniqueRenderPass> cache; | 94 | std::unordered_map<RenderPassParams, vk::RenderPass> cache; |
| 95 | }; | 95 | }; |
| 96 | 96 | ||
| 97 | } // namespace Vulkan | 97 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp index 525b4bb46..dc06f545a 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_resource_manager.cpp | |||
| @@ -6,83 +6,83 @@ | |||
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_device.h" | 9 | #include "video_core/renderer_vulkan/vk_device.h" |
| 11 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 10 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 11 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 12 | 12 | ||
| 13 | namespace Vulkan { | 13 | namespace Vulkan { |
| 14 | 14 | ||
| 15 | namespace { | ||
| 16 | |||
| 15 | // TODO(Rodrigo): Fine tune these numbers. | 17 | // TODO(Rodrigo): Fine tune these numbers. |
| 16 | constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; | 18 | constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; |
| 17 | constexpr std::size_t FENCES_GROW_STEP = 0x40; | 19 | constexpr std::size_t FENCES_GROW_STEP = 0x40; |
| 18 | 20 | ||
| 21 | VkFenceCreateInfo BuildFenceCreateInfo() { | ||
| 22 | VkFenceCreateInfo fence_ci; | ||
| 23 | fence_ci.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; | ||
| 24 | fence_ci.pNext = nullptr; | ||
| 25 | fence_ci.flags = 0; | ||
| 26 | return fence_ci; | ||
| 27 | } | ||
| 28 | |||
| 29 | } // Anonymous namespace | ||
| 30 | |||
| 19 | class CommandBufferPool final : public VKFencedPool { | 31 | class CommandBufferPool final : public VKFencedPool { |
| 20 | public: | 32 | public: |
| 21 | CommandBufferPool(const VKDevice& device) | 33 | CommandBufferPool(const VKDevice& device) |
| 22 | : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} | 34 | : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} |
| 23 | 35 | ||
| 24 | void Allocate(std::size_t begin, std::size_t end) override { | 36 | void Allocate(std::size_t begin, std::size_t end) override { |
| 25 | const auto dev = device.GetLogical(); | ||
| 26 | const auto& dld = device.GetDispatchLoader(); | ||
| 27 | const u32 graphics_family = device.GetGraphicsFamily(); | ||
| 28 | |||
| 29 | auto pool = std::make_unique<Pool>(); | ||
| 30 | |||
| 31 | // Command buffers are going to be commited, recorded, executed every single usage cycle. | 37 | // Command buffers are going to be commited, recorded, executed every single usage cycle. |
| 32 | // They are also going to be reseted when commited. | 38 | // They are also going to be reseted when commited. |
| 33 | const auto pool_flags = vk::CommandPoolCreateFlagBits::eTransient | | 39 | VkCommandPoolCreateInfo command_pool_ci; |
| 34 | vk::CommandPoolCreateFlagBits::eResetCommandBuffer; | 40 | command_pool_ci.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; |
| 35 | const vk::CommandPoolCreateInfo cmdbuf_pool_ci(pool_flags, graphics_family); | 41 | command_pool_ci.pNext = nullptr; |
| 36 | pool->handle = dev.createCommandPoolUnique(cmdbuf_pool_ci, nullptr, dld); | 42 | command_pool_ci.flags = |
| 37 | 43 | VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; | |
| 38 | const vk::CommandBufferAllocateInfo cmdbuf_ai(*pool->handle, | 44 | command_pool_ci.queueFamilyIndex = device.GetGraphicsFamily(); |
| 39 | vk::CommandBufferLevel::ePrimary, | 45 | |
| 40 | static_cast<u32>(COMMAND_BUFFER_POOL_SIZE)); | 46 | Pool& pool = pools.emplace_back(); |
| 41 | pool->cmdbufs = | 47 | pool.handle = device.GetLogical().CreateCommandPool(command_pool_ci); |
| 42 | dev.allocateCommandBuffersUnique<std::allocator<UniqueCommandBuffer>>(cmdbuf_ai, dld); | 48 | pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); |
| 43 | |||
| 44 | pools.push_back(std::move(pool)); | ||
| 45 | } | 49 | } |
| 46 | 50 | ||
| 47 | vk::CommandBuffer Commit(VKFence& fence) { | 51 | VkCommandBuffer Commit(VKFence& fence) { |
| 48 | const std::size_t index = CommitResource(fence); | 52 | const std::size_t index = CommitResource(fence); |
| 49 | const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; | 53 | const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; |
| 50 | const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; | 54 | const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; |
| 51 | return *pools[pool_index]->cmdbufs[sub_index]; | 55 | return pools[pool_index].cmdbufs[sub_index]; |
| 52 | } | 56 | } |
| 53 | 57 | ||
| 54 | private: | 58 | private: |
| 55 | struct Pool { | 59 | struct Pool { |
| 56 | UniqueCommandPool handle; | 60 | vk::CommandPool handle; |
| 57 | std::vector<UniqueCommandBuffer> cmdbufs; | 61 | vk::CommandBuffers cmdbufs; |
| 58 | }; | 62 | }; |
| 59 | 63 | ||
| 60 | const VKDevice& device; | 64 | const VKDevice& device; |
| 61 | 65 | std::vector<Pool> pools; | |
| 62 | std::vector<std::unique_ptr<Pool>> pools; | ||
| 63 | }; | 66 | }; |
| 64 | 67 | ||
| 65 | VKResource::VKResource() = default; | 68 | VKResource::VKResource() = default; |
| 66 | 69 | ||
| 67 | VKResource::~VKResource() = default; | 70 | VKResource::~VKResource() = default; |
| 68 | 71 | ||
| 69 | VKFence::VKFence(const VKDevice& device, UniqueFence handle) | 72 | VKFence::VKFence(const VKDevice& device) |
| 70 | : device{device}, handle{std::move(handle)} {} | 73 | : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {} |
| 71 | 74 | ||
| 72 | VKFence::~VKFence() = default; | 75 | VKFence::~VKFence() = default; |
| 73 | 76 | ||
| 74 | void VKFence::Wait() { | 77 | void VKFence::Wait() { |
| 75 | static constexpr u64 timeout = std::numeric_limits<u64>::max(); | 78 | switch (const VkResult result = handle.Wait()) { |
| 76 | const auto dev = device.GetLogical(); | 79 | case VK_SUCCESS: |
| 77 | const auto& dld = device.GetDispatchLoader(); | ||
| 78 | switch (const auto result = dev.waitForFences(1, &*handle, true, timeout, dld)) { | ||
| 79 | case vk::Result::eSuccess: | ||
| 80 | return; | 80 | return; |
| 81 | case vk::Result::eErrorDeviceLost: | 81 | case VK_ERROR_DEVICE_LOST: |
| 82 | device.ReportLoss(); | 82 | device.ReportLoss(); |
| 83 | [[fallthrough]]; | 83 | [[fallthrough]]; |
| 84 | default: | 84 | default: |
| 85 | vk::throwResultException(result, "vk::waitForFences"); | 85 | throw vk::Exception(result); |
| 86 | } | 86 | } |
| 87 | } | 87 | } |
| 88 | 88 | ||
| @@ -107,13 +107,11 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) { | |||
| 107 | return false; | 107 | return false; |
| 108 | } | 108 | } |
| 109 | 109 | ||
| 110 | const auto dev = device.GetLogical(); | ||
| 111 | const auto& dld = device.GetDispatchLoader(); | ||
| 112 | if (gpu_wait) { | 110 | if (gpu_wait) { |
| 113 | // Wait for the fence if it has been requested. | 111 | // Wait for the fence if it has been requested. |
| 114 | dev.waitForFences({*handle}, true, std::numeric_limits<u64>::max(), dld); | 112 | (void)handle.Wait(); |
| 115 | } else { | 113 | } else { |
| 116 | if (dev.getFenceStatus(*handle, dld) != vk::Result::eSuccess) { | 114 | if (handle.GetStatus() != VK_SUCCESS) { |
| 117 | // Vulkan fence is not ready, not much it can do here | 115 | // Vulkan fence is not ready, not much it can do here |
| 118 | return false; | 116 | return false; |
| 119 | } | 117 | } |
| @@ -126,7 +124,7 @@ bool VKFence::Tick(bool gpu_wait, bool owner_wait) { | |||
| 126 | protected_resources.clear(); | 124 | protected_resources.clear(); |
| 127 | 125 | ||
| 128 | // Prepare fence for reusage. | 126 | // Prepare fence for reusage. |
| 129 | dev.resetFences({*handle}, dld); | 127 | handle.Reset(); |
| 130 | is_used = false; | 128 | is_used = false; |
| 131 | return true; | 129 | return true; |
| 132 | } | 130 | } |
| @@ -299,21 +297,16 @@ VKFence& VKResourceManager::CommitFence() { | |||
| 299 | return *found_fence; | 297 | return *found_fence; |
| 300 | } | 298 | } |
| 301 | 299 | ||
| 302 | vk::CommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { | 300 | VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { |
| 303 | return command_buffer_pool->Commit(fence); | 301 | return command_buffer_pool->Commit(fence); |
| 304 | } | 302 | } |
| 305 | 303 | ||
| 306 | void VKResourceManager::GrowFences(std::size_t new_fences_count) { | 304 | void VKResourceManager::GrowFences(std::size_t new_fences_count) { |
| 307 | const auto dev = device.GetLogical(); | ||
| 308 | const auto& dld = device.GetDispatchLoader(); | ||
| 309 | const vk::FenceCreateInfo fence_ci; | ||
| 310 | |||
| 311 | const std::size_t previous_size = fences.size(); | 305 | const std::size_t previous_size = fences.size(); |
| 312 | fences.resize(previous_size + new_fences_count); | 306 | fences.resize(previous_size + new_fences_count); |
| 313 | 307 | ||
| 314 | std::generate(fences.begin() + previous_size, fences.end(), [&]() { | 308 | std::generate(fences.begin() + previous_size, fences.end(), |
| 315 | return std::make_unique<VKFence>(device, dev.createFenceUnique(fence_ci, nullptr, dld)); | 309 | [this] { return std::make_unique<VKFence>(device); }); |
| 316 | }); | ||
| 317 | } | 310 | } |
| 318 | 311 | ||
| 319 | } // namespace Vulkan | 312 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h index d4cbc95a5..f683d2276 100644 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ b/src/video_core/renderer_vulkan/vk_resource_manager.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 10 | #include "video_core/renderer_vulkan/wrapper.h" |
| 11 | 11 | ||
| 12 | namespace Vulkan { | 12 | namespace Vulkan { |
| 13 | 13 | ||
| @@ -42,7 +42,7 @@ class VKFence { | |||
| 42 | friend class VKResourceManager; | 42 | friend class VKResourceManager; |
| 43 | 43 | ||
| 44 | public: | 44 | public: |
| 45 | explicit VKFence(const VKDevice& device, UniqueFence handle); | 45 | explicit VKFence(const VKDevice& device); |
| 46 | ~VKFence(); | 46 | ~VKFence(); |
| 47 | 47 | ||
| 48 | /** | 48 | /** |
| @@ -69,7 +69,7 @@ public: | |||
| 69 | void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; | 69 | void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; |
| 70 | 70 | ||
| 71 | /// Retreives the fence. | 71 | /// Retreives the fence. |
| 72 | operator vk::Fence() const { | 72 | operator VkFence() const { |
| 73 | return *handle; | 73 | return *handle; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| @@ -87,7 +87,7 @@ private: | |||
| 87 | bool Tick(bool gpu_wait, bool owner_wait); | 87 | bool Tick(bool gpu_wait, bool owner_wait); |
| 88 | 88 | ||
| 89 | const VKDevice& device; ///< Device handler | 89 | const VKDevice& device; ///< Device handler |
| 90 | UniqueFence handle; ///< Vulkan fence | 90 | vk::Fence handle; ///< Vulkan fence |
| 91 | std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence | 91 | std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence |
| 92 | bool is_owned = false; ///< The fence has been commited but not released yet. | 92 | bool is_owned = false; ///< The fence has been commited but not released yet. |
| 93 | bool is_used = false; ///< The fence has been commited but it has not been checked to be free. | 93 | bool is_used = false; ///< The fence has been commited but it has not been checked to be free. |
| @@ -181,7 +181,7 @@ public: | |||
| 181 | VKFence& CommitFence(); | 181 | VKFence& CommitFence(); |
| 182 | 182 | ||
| 183 | /// Commits an unused command buffer and protects it with a fence. | 183 | /// Commits an unused command buffer and protects it with a fence. |
| 184 | vk::CommandBuffer CommitCommandBuffer(VKFence& fence); | 184 | VkCommandBuffer CommitCommandBuffer(VKFence& fence); |
| 185 | 185 | ||
| 186 | private: | 186 | private: |
| 187 | /// Allocates new fences. | 187 | /// Allocates new fences. |
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 204b7c39c..07bbcf520 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp | |||
| @@ -7,64 +7,64 @@ | |||
| 7 | #include <unordered_map> | 7 | #include <unordered_map> |
| 8 | 8 | ||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 10 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 12 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | 11 | #include "video_core/renderer_vulkan/vk_sampler_cache.h" |
| 12 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 13 | #include "video_core/textures/texture.h" | 13 | #include "video_core/textures/texture.h" |
| 14 | 14 | ||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| 17 | static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> color) { | 17 | namespace { |
| 18 | |||
| 19 | VkBorderColor ConvertBorderColor(std::array<float, 4> color) { | ||
| 18 | // TODO(Rodrigo): Manage integer border colors | 20 | // TODO(Rodrigo): Manage integer border colors |
| 19 | if (color == std::array<float, 4>{0, 0, 0, 0}) { | 21 | if (color == std::array<float, 4>{0, 0, 0, 0}) { |
| 20 | return vk::BorderColor::eFloatTransparentBlack; | 22 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; |
| 21 | } else if (color == std::array<float, 4>{0, 0, 0, 1}) { | 23 | } else if (color == std::array<float, 4>{0, 0, 0, 1}) { |
| 22 | return vk::BorderColor::eFloatOpaqueBlack; | 24 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; |
| 23 | } else if (color == std::array<float, 4>{1, 1, 1, 1}) { | 25 | } else if (color == std::array<float, 4>{1, 1, 1, 1}) { |
| 24 | return vk::BorderColor::eFloatOpaqueWhite; | 26 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; |
| 27 | } | ||
| 28 | if (color[0] + color[1] + color[2] > 1.35f) { | ||
| 29 | // If color elements are brighter than roughly 0.5 average, use white border | ||
| 30 | return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE; | ||
| 31 | } else if (color[3] > 0.5f) { | ||
| 32 | return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK; | ||
| 25 | } else { | 33 | } else { |
| 26 | if (color[0] + color[1] + color[2] > 1.35f) { | 34 | return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; |
| 27 | // If color elements are brighter than roughly 0.5 average, use white border | ||
| 28 | return vk::BorderColor::eFloatOpaqueWhite; | ||
| 29 | } | ||
| 30 | if (color[3] > 0.5f) { | ||
| 31 | return vk::BorderColor::eFloatOpaqueBlack; | ||
| 32 | } | ||
| 33 | return vk::BorderColor::eFloatTransparentBlack; | ||
| 34 | } | 35 | } |
| 35 | } | 36 | } |
| 36 | 37 | ||
| 38 | } // Anonymous namespace | ||
| 39 | |||
| 37 | VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} | 40 | VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {} |
| 38 | 41 | ||
| 39 | VKSamplerCache::~VKSamplerCache() = default; | 42 | VKSamplerCache::~VKSamplerCache() = default; |
| 40 | 43 | ||
| 41 | UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { | 44 | vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const { |
| 42 | const float max_anisotropy{tsc.GetMaxAnisotropy()}; | 45 | VkSamplerCreateInfo ci; |
| 43 | const bool has_anisotropy{max_anisotropy > 1.0f}; | 46 | ci.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; |
| 44 | 47 | ci.pNext = nullptr; | |
| 45 | const auto border_color{tsc.GetBorderColor()}; | 48 | ci.flags = 0; |
| 46 | const auto vk_border_color{TryConvertBorderColor(border_color)}; | 49 | ci.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter); |
| 47 | 50 | ci.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter); | |
| 48 | constexpr bool unnormalized_coords{false}; | 51 | ci.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter); |
| 49 | 52 | ci.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter); | |
| 50 | const vk::SamplerCreateInfo sampler_ci( | 53 | ci.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter); |
| 51 | {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter), | 54 | ci.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter); |
| 52 | MaxwellToVK::Sampler::Filter(tsc.min_filter), | 55 | ci.mipLodBias = tsc.GetLodBias(); |
| 53 | MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter), | 56 | ci.anisotropyEnable = tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE; |
| 54 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter), | 57 | ci.maxAnisotropy = tsc.GetMaxAnisotropy(); |
| 55 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter), | 58 | ci.compareEnable = tsc.depth_compare_enabled; |
| 56 | MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(), | 59 | ci.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func); |
| 57 | has_anisotropy, max_anisotropy, tsc.depth_compare_enabled, | 60 | ci.minLod = tsc.GetMinLod(); |
| 58 | MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(), | 61 | ci.maxLod = tsc.GetMaxLod(); |
| 59 | tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack), | 62 | ci.borderColor = ConvertBorderColor(tsc.GetBorderColor()); |
| 60 | unnormalized_coords); | 63 | ci.unnormalizedCoordinates = VK_FALSE; |
| 61 | 64 | return device.GetLogical().CreateSampler(ci); | |
| 62 | const auto& dld{device.GetDispatchLoader()}; | ||
| 63 | const auto dev{device.GetLogical()}; | ||
| 64 | return dev.createSamplerUnique(sampler_ci, nullptr, dld); | ||
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const { | 67 | VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const { |
| 68 | return *sampler; | 68 | return *sampler; |
| 69 | } | 69 | } |
| 70 | 70 | ||
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h index 1f73b716b..a33d1c0ee 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.h +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "video_core/renderer_vulkan/declarations.h" | 7 | #include "video_core/renderer_vulkan/wrapper.h" |
| 8 | #include "video_core/sampler_cache.h" | 8 | #include "video_core/sampler_cache.h" |
| 9 | #include "video_core/textures/texture.h" | 9 | #include "video_core/textures/texture.h" |
| 10 | 10 | ||
| @@ -12,15 +12,15 @@ namespace Vulkan { | |||
| 12 | 12 | ||
| 13 | class VKDevice; | 13 | class VKDevice; |
| 14 | 14 | ||
| 15 | class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> { | 15 | class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> { |
| 16 | public: | 16 | public: |
| 17 | explicit VKSamplerCache(const VKDevice& device); | 17 | explicit VKSamplerCache(const VKDevice& device); |
| 18 | ~VKSamplerCache(); | 18 | ~VKSamplerCache(); |
| 19 | 19 | ||
| 20 | protected: | 20 | protected: |
| 21 | UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; | 21 | vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override; |
| 22 | 22 | ||
| 23 | vk::Sampler ToSamplerType(const UniqueSampler& sampler) const override; | 23 | VkSampler ToSamplerType(const vk::Sampler& sampler) const override; |
| 24 | 24 | ||
| 25 | private: | 25 | private: |
| 26 | const VKDevice& device; | 26 | const VKDevice& device; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index b61d4fe63..900f551b3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -10,23 +10,22 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/microprofile.h" | 12 | #include "common/microprofile.h" |
| 13 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_device.h" | 13 | #include "video_core/renderer_vulkan/vk_device.h" |
| 15 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 14 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 17 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 16 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 18 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 17 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 18 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 19 | 19 | ||
| 20 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | 21 | ||
| 22 | MICROPROFILE_DECLARE(Vulkan_WaitForWorker); | 22 | MICROPROFILE_DECLARE(Vulkan_WaitForWorker); |
| 23 | 23 | ||
| 24 | void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, | 24 | void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { |
| 25 | const vk::DispatchLoaderDynamic& dld) { | ||
| 26 | auto command = first; | 25 | auto command = first; |
| 27 | while (command != nullptr) { | 26 | while (command != nullptr) { |
| 28 | auto next = command->GetNext(); | 27 | auto next = command->GetNext(); |
| 29 | command->Execute(cmdbuf, dld); | 28 | command->Execute(cmdbuf); |
| 30 | command->~Command(); | 29 | command->~Command(); |
| 31 | command = next; | 30 | command = next; |
| 32 | } | 31 | } |
| @@ -51,7 +50,7 @@ VKScheduler::~VKScheduler() { | |||
| 51 | worker_thread.join(); | 50 | worker_thread.join(); |
| 52 | } | 51 | } |
| 53 | 52 | ||
| 54 | void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { | 53 | void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) { |
| 55 | SubmitExecution(semaphore); | 54 | SubmitExecution(semaphore); |
| 56 | if (release_fence) { | 55 | if (release_fence) { |
| 57 | current_fence->Release(); | 56 | current_fence->Release(); |
| @@ -59,7 +58,7 @@ void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) { | |||
| 59 | AllocateNewContext(); | 58 | AllocateNewContext(); |
| 60 | } | 59 | } |
| 61 | 60 | ||
| 62 | void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) { | 61 | void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) { |
| 63 | SubmitExecution(semaphore); | 62 | SubmitExecution(semaphore); |
| 64 | current_fence->Wait(); | 63 | current_fence->Wait(); |
| 65 | if (release_fence) { | 64 | if (release_fence) { |
| @@ -89,17 +88,34 @@ void VKScheduler::DispatchWork() { | |||
| 89 | AcquireNewChunk(); | 88 | AcquireNewChunk(); |
| 90 | } | 89 | } |
| 91 | 90 | ||
| 92 | void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) { | 91 | void VKScheduler::RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, |
| 93 | if (state.renderpass && renderpass_bi == *state.renderpass) { | 92 | VkExtent2D render_area) { |
| 93 | if (renderpass == state.renderpass && framebuffer == state.framebuffer && | ||
| 94 | render_area.width == state.render_area.width && | ||
| 95 | render_area.height == state.render_area.height) { | ||
| 94 | return; | 96 | return; |
| 95 | } | 97 | } |
| 96 | const bool end_renderpass = state.renderpass.has_value(); | 98 | const bool end_renderpass = state.renderpass != nullptr; |
| 97 | state.renderpass = renderpass_bi; | 99 | state.renderpass = renderpass; |
| 98 | Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) { | 100 | state.framebuffer = framebuffer; |
| 101 | state.render_area = render_area; | ||
| 102 | |||
| 103 | VkRenderPassBeginInfo renderpass_bi; | ||
| 104 | renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; | ||
| 105 | renderpass_bi.pNext = nullptr; | ||
| 106 | renderpass_bi.renderPass = renderpass; | ||
| 107 | renderpass_bi.framebuffer = framebuffer; | ||
| 108 | renderpass_bi.renderArea.offset.x = 0; | ||
| 109 | renderpass_bi.renderArea.offset.y = 0; | ||
| 110 | renderpass_bi.renderArea.extent = render_area; | ||
| 111 | renderpass_bi.clearValueCount = 0; | ||
| 112 | renderpass_bi.pClearValues = nullptr; | ||
| 113 | |||
| 114 | Record([renderpass_bi, end_renderpass](vk::CommandBuffer cmdbuf) { | ||
| 99 | if (end_renderpass) { | 115 | if (end_renderpass) { |
| 100 | cmdbuf.endRenderPass(dld); | 116 | cmdbuf.EndRenderPass(); |
| 101 | } | 117 | } |
| 102 | cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld); | 118 | cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); |
| 103 | }); | 119 | }); |
| 104 | } | 120 | } |
| 105 | 121 | ||
| @@ -107,13 +123,13 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { | |||
| 107 | EndRenderPass(); | 123 | EndRenderPass(); |
| 108 | } | 124 | } |
| 109 | 125 | ||
| 110 | void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) { | 126 | void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { |
| 111 | if (state.graphics_pipeline == pipeline) { | 127 | if (state.graphics_pipeline == pipeline) { |
| 112 | return; | 128 | return; |
| 113 | } | 129 | } |
| 114 | state.graphics_pipeline = pipeline; | 130 | state.graphics_pipeline = pipeline; |
| 115 | Record([pipeline](auto cmdbuf, auto& dld) { | 131 | Record([pipeline](vk::CommandBuffer cmdbuf) { |
| 116 | cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld); | 132 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); |
| 117 | }); | 133 | }); |
| 118 | } | 134 | } |
| 119 | 135 | ||
| @@ -126,37 +142,50 @@ void VKScheduler::WorkerThread() { | |||
| 126 | } | 142 | } |
| 127 | auto extracted_chunk = std::move(chunk_queue.Front()); | 143 | auto extracted_chunk = std::move(chunk_queue.Front()); |
| 128 | chunk_queue.Pop(); | 144 | chunk_queue.Pop(); |
| 129 | extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader()); | 145 | extracted_chunk->ExecuteAll(current_cmdbuf); |
| 130 | chunk_reserve.Push(std::move(extracted_chunk)); | 146 | chunk_reserve.Push(std::move(extracted_chunk)); |
| 131 | } while (!quit); | 147 | } while (!quit); |
| 132 | } | 148 | } |
| 133 | 149 | ||
| 134 | void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { | 150 | void VKScheduler::SubmitExecution(VkSemaphore semaphore) { |
| 135 | EndPendingOperations(); | 151 | EndPendingOperations(); |
| 136 | InvalidateState(); | 152 | InvalidateState(); |
| 137 | WaitWorker(); | 153 | WaitWorker(); |
| 138 | 154 | ||
| 139 | std::unique_lock lock{mutex}; | 155 | std::unique_lock lock{mutex}; |
| 140 | 156 | ||
| 141 | const auto queue = device.GetGraphicsQueue(); | 157 | current_cmdbuf.End(); |
| 142 | const auto& dld = device.GetDispatchLoader(); | ||
| 143 | current_cmdbuf.end(dld); | ||
| 144 | 158 | ||
| 145 | const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, ¤t_cmdbuf, semaphore ? 1U : 0U, | 159 | VkSubmitInfo submit_info; |
| 146 | &semaphore); | 160 | submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; |
| 147 | queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld); | 161 | submit_info.pNext = nullptr; |
| 162 | submit_info.waitSemaphoreCount = 0; | ||
| 163 | submit_info.pWaitSemaphores = nullptr; | ||
| 164 | submit_info.pWaitDstStageMask = nullptr; | ||
| 165 | submit_info.commandBufferCount = 1; | ||
| 166 | submit_info.pCommandBuffers = current_cmdbuf.address(); | ||
| 167 | submit_info.signalSemaphoreCount = semaphore ? 1 : 0; | ||
| 168 | submit_info.pSignalSemaphores = &semaphore; | ||
| 169 | device.GetGraphicsQueue().Submit(submit_info, *current_fence); | ||
| 148 | } | 170 | } |
| 149 | 171 | ||
| 150 | void VKScheduler::AllocateNewContext() { | 172 | void VKScheduler::AllocateNewContext() { |
| 151 | ++ticks; | 173 | ++ticks; |
| 152 | 174 | ||
| 175 | VkCommandBufferBeginInfo cmdbuf_bi; | ||
| 176 | cmdbuf_bi.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; | ||
| 177 | cmdbuf_bi.pNext = nullptr; | ||
| 178 | cmdbuf_bi.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; | ||
| 179 | cmdbuf_bi.pInheritanceInfo = nullptr; | ||
| 180 | |||
| 153 | std::unique_lock lock{mutex}; | 181 | std::unique_lock lock{mutex}; |
| 154 | current_fence = next_fence; | 182 | current_fence = next_fence; |
| 155 | next_fence = &resource_manager.CommitFence(); | 183 | next_fence = &resource_manager.CommitFence(); |
| 156 | 184 | ||
| 157 | current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); | 185 | current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), |
| 158 | current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, | 186 | device.GetDispatchLoader()); |
| 159 | device.GetDispatchLoader()); | 187 | current_cmdbuf.Begin(cmdbuf_bi); |
| 188 | |||
| 160 | // Enable counters once again. These are disabled when a command buffer is finished. | 189 | // Enable counters once again. These are disabled when a command buffer is finished. |
| 161 | if (query_cache) { | 190 | if (query_cache) { |
| 162 | query_cache->UpdateCounters(); | 191 | query_cache->UpdateCounters(); |
| @@ -177,8 +206,8 @@ void VKScheduler::EndRenderPass() { | |||
| 177 | if (!state.renderpass) { | 206 | if (!state.renderpass) { |
| 178 | return; | 207 | return; |
| 179 | } | 208 | } |
| 180 | state.renderpass = std::nullopt; | 209 | state.renderpass = nullptr; |
| 181 | Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); }); | 210 | Record([](vk::CommandBuffer cmdbuf) { cmdbuf.EndRenderPass(); }); |
| 182 | } | 211 | } |
| 183 | 212 | ||
| 184 | void VKScheduler::AcquireNewChunk() { | 213 | void VKScheduler::AcquireNewChunk() { |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c7cc291c3..82a8adc69 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | #include <utility> | 13 | #include <utility> |
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/threadsafe_queue.h" | 15 | #include "common/threadsafe_queue.h" |
| 16 | #include "video_core/renderer_vulkan/declarations.h" | 16 | #include "video_core/renderer_vulkan/wrapper.h" |
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| @@ -49,10 +49,10 @@ public: | |||
| 49 | ~VKScheduler(); | 49 | ~VKScheduler(); |
| 50 | 50 | ||
| 51 | /// Sends the current execution context to the GPU. | 51 | /// Sends the current execution context to the GPU. |
| 52 | void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr); | 52 | void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr); |
| 53 | 53 | ||
| 54 | /// Sends the current execution context to the GPU and waits for it to complete. | 54 | /// Sends the current execution context to the GPU and waits for it to complete. |
| 55 | void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr); | 55 | void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr); |
| 56 | 56 | ||
| 57 | /// Waits for the worker thread to finish executing everything. After this function returns it's | 57 | /// Waits for the worker thread to finish executing everything. After this function returns it's |
| 58 | /// safe to touch worker resources. | 58 | /// safe to touch worker resources. |
| @@ -62,14 +62,15 @@ public: | |||
| 62 | void DispatchWork(); | 62 | void DispatchWork(); |
| 63 | 63 | ||
| 64 | /// Requests to begin a renderpass. | 64 | /// Requests to begin a renderpass. |
| 65 | void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi); | 65 | void RequestRenderpass(VkRenderPass renderpass, VkFramebuffer framebuffer, |
| 66 | VkExtent2D render_area); | ||
| 66 | 67 | ||
| 67 | /// Requests the current executino context to be able to execute operations only allowed outside | 68 | /// Requests the current executino context to be able to execute operations only allowed outside |
| 68 | /// of a renderpass. | 69 | /// of a renderpass. |
| 69 | void RequestOutsideRenderPassOperationContext(); | 70 | void RequestOutsideRenderPassOperationContext(); |
| 70 | 71 | ||
| 71 | /// Binds a pipeline to the current execution context. | 72 | /// Binds a pipeline to the current execution context. |
| 72 | void BindGraphicsPipeline(vk::Pipeline pipeline); | 73 | void BindGraphicsPipeline(VkPipeline pipeline); |
| 73 | 74 | ||
| 74 | /// Assigns the query cache. | 75 | /// Assigns the query cache. |
| 75 | void SetQueryCache(VKQueryCache& query_cache_) { | 76 | void SetQueryCache(VKQueryCache& query_cache_) { |
| @@ -101,8 +102,7 @@ private: | |||
| 101 | public: | 102 | public: |
| 102 | virtual ~Command() = default; | 103 | virtual ~Command() = default; |
| 103 | 104 | ||
| 104 | virtual void Execute(vk::CommandBuffer cmdbuf, | 105 | virtual void Execute(vk::CommandBuffer cmdbuf) const = 0; |
| 105 | const vk::DispatchLoaderDynamic& dld) const = 0; | ||
| 106 | 106 | ||
| 107 | Command* GetNext() const { | 107 | Command* GetNext() const { |
| 108 | return next; | 108 | return next; |
| @@ -125,9 +125,8 @@ private: | |||
| 125 | TypedCommand(TypedCommand&&) = delete; | 125 | TypedCommand(TypedCommand&&) = delete; |
| 126 | TypedCommand& operator=(TypedCommand&&) = delete; | 126 | TypedCommand& operator=(TypedCommand&&) = delete; |
| 127 | 127 | ||
| 128 | void Execute(vk::CommandBuffer cmdbuf, | 128 | void Execute(vk::CommandBuffer cmdbuf) const override { |
| 129 | const vk::DispatchLoaderDynamic& dld) const override { | 129 | command(cmdbuf); |
| 130 | command(cmdbuf, dld); | ||
| 131 | } | 130 | } |
| 132 | 131 | ||
| 133 | private: | 132 | private: |
| @@ -136,7 +135,7 @@ private: | |||
| 136 | 135 | ||
| 137 | class CommandChunk final { | 136 | class CommandChunk final { |
| 138 | public: | 137 | public: |
| 139 | void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld); | 138 | void ExecuteAll(vk::CommandBuffer cmdbuf); |
| 140 | 139 | ||
| 141 | template <typename T> | 140 | template <typename T> |
| 142 | bool Record(T& command) { | 141 | bool Record(T& command) { |
| @@ -175,7 +174,7 @@ private: | |||
| 175 | 174 | ||
| 176 | void WorkerThread(); | 175 | void WorkerThread(); |
| 177 | 176 | ||
| 178 | void SubmitExecution(vk::Semaphore semaphore); | 177 | void SubmitExecution(VkSemaphore semaphore); |
| 179 | 178 | ||
| 180 | void AllocateNewContext(); | 179 | void AllocateNewContext(); |
| 181 | 180 | ||
| @@ -198,8 +197,10 @@ private: | |||
| 198 | VKFence* next_fence = nullptr; | 197 | VKFence* next_fence = nullptr; |
| 199 | 198 | ||
| 200 | struct State { | 199 | struct State { |
| 201 | std::optional<vk::RenderPassBeginInfo> renderpass; | 200 | VkRenderPass renderpass = nullptr; |
| 202 | vk::Pipeline graphics_pipeline; | 201 | VkFramebuffer framebuffer = nullptr; |
| 202 | VkExtent2D render_area = {0, 0}; | ||
| 203 | VkPipeline graphics_pipeline = nullptr; | ||
| 203 | } state; | 204 | } state; |
| 204 | 205 | ||
| 205 | std::unique_ptr<CommandChunk> chunk; | 206 | std::unique_ptr<CommandChunk> chunk; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 24d3ca08f..aaa138f52 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -35,7 +35,7 @@ namespace { | |||
| 35 | using Sirit::Id; | 35 | using Sirit::Id; |
| 36 | using Tegra::Engines::ShaderType; | 36 | using Tegra::Engines::ShaderType; |
| 37 | using Tegra::Shader::Attribute; | 37 | using Tegra::Shader::Attribute; |
| 38 | using Tegra::Shader::AttributeUse; | 38 | using Tegra::Shader::PixelImap; |
| 39 | using Tegra::Shader::Register; | 39 | using Tegra::Shader::Register; |
| 40 | using namespace VideoCommon::Shader; | 40 | using namespace VideoCommon::Shader; |
| 41 | 41 | ||
| @@ -752,16 +752,16 @@ private: | |||
| 752 | if (stage != ShaderType::Fragment) { | 752 | if (stage != ShaderType::Fragment) { |
| 753 | continue; | 753 | continue; |
| 754 | } | 754 | } |
| 755 | switch (header.ps.GetAttributeUse(location)) { | 755 | switch (header.ps.GetPixelImap(location)) { |
| 756 | case AttributeUse::Constant: | 756 | case PixelImap::Constant: |
| 757 | Decorate(id, spv::Decoration::Flat); | 757 | Decorate(id, spv::Decoration::Flat); |
| 758 | break; | 758 | break; |
| 759 | case AttributeUse::ScreenLinear: | 759 | case PixelImap::Perspective: |
| 760 | Decorate(id, spv::Decoration::NoPerspective); | ||
| 761 | break; | ||
| 762 | case AttributeUse::Perspective: | ||
| 763 | // Default | 760 | // Default |
| 764 | break; | 761 | break; |
| 762 | case PixelImap::ScreenLinear: | ||
| 763 | Decorate(id, spv::Decoration::NoPerspective); | ||
| 764 | break; | ||
| 765 | default: | 765 | default: |
| 766 | UNREACHABLE_MSG("Unused attribute being fetched"); | 766 | UNREACHABLE_MSG("Unused attribute being fetched"); |
| 767 | } | 767 | } |
| @@ -801,7 +801,7 @@ private: | |||
| 801 | if (IsOutputAttributeArray()) { | 801 | if (IsOutputAttributeArray()) { |
| 802 | const u32 num = GetNumOutputVertices(); | 802 | const u32 num = GetNumOutputVertices(); |
| 803 | type = TypeArray(type, Constant(t_uint, num)); | 803 | type = TypeArray(type, Constant(t_uint, num)); |
| 804 | if (device.GetDriverID() != vk::DriverIdKHR::eIntelProprietaryWindows) { | 804 | if (device.GetDriverID() != VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR) { |
| 805 | // Intel's proprietary driver fails to setup defaults for arrayed output | 805 | // Intel's proprietary driver fails to setup defaults for arrayed output |
| 806 | // attributes. | 806 | // attributes. |
| 807 | varying_default = ConstantComposite(type, std::vector(num, varying_default)); | 807 | varying_default = ConstantComposite(type, std::vector(num, varying_default)); |
| @@ -1145,9 +1145,6 @@ private: | |||
| 1145 | switch (attribute) { | 1145 | switch (attribute) { |
| 1146 | case Attribute::Index::Position: { | 1146 | case Attribute::Index::Position: { |
| 1147 | if (stage == ShaderType::Fragment) { | 1147 | if (stage == ShaderType::Fragment) { |
| 1148 | if (element == 3) { | ||
| 1149 | return {Constant(t_float, 1.0f), Type::Float}; | ||
| 1150 | } | ||
| 1151 | return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), | 1148 | return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), |
| 1152 | Type::Float}; | 1149 | Type::Float}; |
| 1153 | } | 1150 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index b97c4cb3d..784839327 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp | |||
| @@ -8,27 +8,25 @@ | |||
| 8 | #include "common/alignment.h" | 8 | #include "common/alignment.h" |
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_device.h" | 11 | #include "video_core/renderer_vulkan/vk_device.h" |
| 13 | #include "video_core/renderer_vulkan/vk_shader_util.h" | 12 | #include "video_core/renderer_vulkan/vk_shader_util.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 14 | 14 | ||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| 17 | UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { | 17 | vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data) { |
| 18 | // Avoid undefined behavior by copying to a staging allocation | 18 | // Avoid undefined behavior by copying to a staging allocation |
| 19 | ASSERT(code_size % sizeof(u32) == 0); | 19 | ASSERT(code_size % sizeof(u32) == 0); |
| 20 | const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); | 20 | const auto data = std::make_unique<u32[]>(code_size / sizeof(u32)); |
| 21 | std::memcpy(data.get(), code_data, code_size); | 21 | std::memcpy(data.get(), code_data, code_size); |
| 22 | 22 | ||
| 23 | const auto dev = device.GetLogical(); | 23 | VkShaderModuleCreateInfo ci; |
| 24 | const auto& dld = device.GetDispatchLoader(); | 24 | ci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; |
| 25 | const vk::ShaderModuleCreateInfo shader_ci({}, code_size, data.get()); | 25 | ci.pNext = nullptr; |
| 26 | vk::ShaderModule shader_module; | 26 | ci.flags = 0; |
| 27 | if (dev.createShaderModule(&shader_ci, nullptr, &shader_module, dld) != vk::Result::eSuccess) { | 27 | ci.codeSize = code_size; |
| 28 | UNREACHABLE_MSG("Shader module failed to build!"); | 28 | ci.pCode = data.get(); |
| 29 | } | 29 | return device.GetLogical().CreateShaderModule(ci); |
| 30 | |||
| 31 | return UniqueShaderModule(shader_module, vk::ObjectDestroy(dev, nullptr, dld)); | ||
| 32 | } | 30 | } |
| 33 | 31 | ||
| 34 | } // namespace Vulkan | 32 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index c06d65970..be38d6697 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h | |||
| @@ -6,12 +6,12 @@ | |||
| 6 | 6 | ||
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/renderer_vulkan/declarations.h" | 9 | #include "video_core/renderer_vulkan/wrapper.h" |
| 10 | 10 | ||
| 11 | namespace Vulkan { | 11 | namespace Vulkan { |
| 12 | 12 | ||
| 13 | class VKDevice; | 13 | class VKDevice; |
| 14 | 14 | ||
| 15 | UniqueShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); | 15 | vk::ShaderModule BuildShader(const VKDevice& device, std::size_t code_size, const u8* code_data); |
| 16 | 16 | ||
| 17 | } // namespace Vulkan | 17 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 374959f82..94d954d7a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 13 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 15 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 15 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 16 | 17 | ||
| 17 | namespace Vulkan { | 18 | namespace Vulkan { |
| 18 | 19 | ||
| @@ -71,17 +72,23 @@ VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_ | |||
| 71 | } | 72 | } |
| 72 | 73 | ||
| 73 | VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { | 74 | VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { |
| 74 | const auto usage = | ||
| 75 | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | | ||
| 76 | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | | ||
| 77 | vk::BufferUsageFlagBits::eIndexBuffer; | ||
| 78 | const u32 log2 = Common::Log2Ceil64(size); | 75 | const u32 log2 = Common::Log2Ceil64(size); |
| 79 | const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, | 76 | |
| 80 | nullptr); | 77 | VkBufferCreateInfo ci; |
| 81 | const auto dev = device.GetLogical(); | 78 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 79 | ci.pNext = nullptr; | ||
| 80 | ci.flags = 0; | ||
| 81 | ci.size = 1ULL << log2; | ||
| 82 | ci.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||
| 83 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | ||
| 84 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; | ||
| 85 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | ||
| 86 | ci.queueFamilyIndexCount = 0; | ||
| 87 | ci.pQueueFamilyIndices = nullptr; | ||
| 88 | |||
| 82 | auto buffer = std::make_unique<VKBuffer>(); | 89 | auto buffer = std::make_unique<VKBuffer>(); |
| 83 | buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader()); | 90 | buffer->handle = device.GetLogical().CreateBuffer(ci); |
| 84 | buffer->commit = memory_manager.Commit(*buffer->handle, host_visible); | 91 | buffer->commit = memory_manager.Commit(buffer->handle, host_visible); |
| 85 | 92 | ||
| 86 | auto& entries = GetCache(host_visible)[log2].entries; | 93 | auto& entries = GetCache(host_visible)[log2].entries; |
| 87 | return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; | 94 | return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 4d9488f49..a0840ff8c 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -11,9 +11,9 @@ | |||
| 11 | 11 | ||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | 13 | ||
| 14 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 14 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| @@ -22,7 +22,7 @@ class VKFenceWatch; | |||
| 22 | class VKScheduler; | 22 | class VKScheduler; |
| 23 | 23 | ||
| 24 | struct VKBuffer final { | 24 | struct VKBuffer final { |
| 25 | UniqueBuffer handle; | 25 | vk::Buffer handle; |
| 26 | VKMemoryCommit commit; | 26 | VKMemoryCommit commit; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index d48d3b44c..38a93a01a 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp | |||
| @@ -9,11 +9,11 @@ | |||
| 9 | 9 | ||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 13 | #include "video_core/renderer_vulkan/vk_device.h" | 12 | #include "video_core/renderer_vulkan/vk_device.h" |
| 14 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 13 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 16 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 15 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| 16 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| @@ -25,8 +25,8 @@ constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; | |||
| 25 | constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; | 25 | constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; |
| 26 | 26 | ||
| 27 | std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, | 27 | std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, |
| 28 | vk::MemoryPropertyFlags wanted) { | 28 | VkMemoryPropertyFlags wanted) { |
| 29 | const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); | 29 | const auto properties = device.GetPhysical().GetMemoryProperties(); |
| 30 | for (u32 i = 0; i < properties.memoryTypeCount; i++) { | 30 | for (u32 i = 0; i < properties.memoryTypeCount; i++) { |
| 31 | if (!(filter & (1 << i))) { | 31 | if (!(filter & (1 << i))) { |
| 32 | continue; | 32 | continue; |
| @@ -35,13 +35,13 @@ std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, | |||
| 35 | return i; | 35 | return i; |
| 36 | } | 36 | } |
| 37 | } | 37 | } |
| 38 | return {}; | 38 | return std::nullopt; |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | } // Anonymous namespace | 41 | } // Anonymous namespace |
| 42 | 42 | ||
| 43 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | 43 | VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, |
| 44 | vk::BufferUsageFlags usage) | 44 | VkBufferUsageFlags usage) |
| 45 | : device{device}, scheduler{scheduler} { | 45 | : device{device}, scheduler{scheduler} { |
| 46 | CreateBuffers(usage); | 46 | CreateBuffers(usage); |
| 47 | ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); | 47 | ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); |
| @@ -78,17 +78,13 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { | |||
| 78 | invalidated = true; | 78 | invalidated = true; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | const auto dev = device.GetLogical(); | 81 | return {memory.Map(offset, size), offset, invalidated}; |
| 82 | const auto& dld = device.GetDispatchLoader(); | ||
| 83 | const auto pointer = reinterpret_cast<u8*>(dev.mapMemory(*memory, offset, size, {}, dld)); | ||
| 84 | return {pointer, offset, invalidated}; | ||
| 85 | } | 82 | } |
| 86 | 83 | ||
| 87 | void VKStreamBuffer::Unmap(u64 size) { | 84 | void VKStreamBuffer::Unmap(u64 size) { |
| 88 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); | 85 | ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); |
| 89 | 86 | ||
| 90 | const auto dev = device.GetLogical(); | 87 | memory.Unmap(); |
| 91 | dev.unmapMemory(*memory, device.GetDispatchLoader()); | ||
| 92 | 88 | ||
| 93 | offset += size; | 89 | offset += size; |
| 94 | 90 | ||
| @@ -101,30 +97,42 @@ void VKStreamBuffer::Unmap(u64 size) { | |||
| 101 | watch.fence.Watch(scheduler.GetFence()); | 97 | watch.fence.Watch(scheduler.GetFence()); |
| 102 | } | 98 | } |
| 103 | 99 | ||
| 104 | void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { | 100 | void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { |
| 105 | const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, | 101 | VkBufferCreateInfo buffer_ci; |
| 106 | 0, nullptr); | 102 | buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 107 | const auto dev = device.GetLogical(); | 103 | buffer_ci.pNext = nullptr; |
| 108 | const auto& dld = device.GetDispatchLoader(); | 104 | buffer_ci.flags = 0; |
| 109 | buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); | 105 | buffer_ci.size = STREAM_BUFFER_SIZE; |
| 106 | buffer_ci.usage = usage; | ||
| 107 | buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | ||
| 108 | buffer_ci.queueFamilyIndexCount = 0; | ||
| 109 | buffer_ci.pQueueFamilyIndices = nullptr; | ||
| 110 | |||
| 111 | const auto& dev = device.GetLogical(); | ||
| 112 | buffer = dev.CreateBuffer(buffer_ci); | ||
| 110 | 113 | ||
| 111 | const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); | 114 | const auto& dld = device.GetDispatchLoader(); |
| 115 | const auto requirements = dev.GetBufferMemoryRequirements(*buffer); | ||
| 112 | // Prefer device local host visible allocations (this should hit AMD's pinned memory). | 116 | // Prefer device local host visible allocations (this should hit AMD's pinned memory). |
| 113 | auto type = FindMemoryType(device, requirements.memoryTypeBits, | 117 | auto type = |
| 114 | vk::MemoryPropertyFlagBits::eHostVisible | | 118 | FindMemoryType(device, requirements.memoryTypeBits, |
| 115 | vk::MemoryPropertyFlagBits::eHostCoherent | | 119 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
| 116 | vk::MemoryPropertyFlagBits::eDeviceLocal); | 120 | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); |
| 117 | if (!type) { | 121 | if (!type) { |
| 118 | // Otherwise search for a host visible allocation. | 122 | // Otherwise search for a host visible allocation. |
| 119 | type = FindMemoryType(device, requirements.memoryTypeBits, | 123 | type = FindMemoryType(device, requirements.memoryTypeBits, |
| 120 | vk::MemoryPropertyFlagBits::eHostVisible | | 124 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | |
| 121 | vk::MemoryPropertyFlagBits::eHostCoherent); | 125 | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); |
| 122 | ASSERT_MSG(type, "No host visible and coherent memory type found"); | 126 | ASSERT_MSG(type, "No host visible and coherent memory type found"); |
| 123 | } | 127 | } |
| 124 | const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); | 128 | VkMemoryAllocateInfo memory_ai; |
| 125 | memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); | 129 | memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; |
| 126 | 130 | memory_ai.pNext = nullptr; | |
| 127 | dev.bindBufferMemory(*buffer, *memory, 0, dld); | 131 | memory_ai.allocationSize = requirements.size; |
| 132 | memory_ai.memoryTypeIndex = *type; | ||
| 133 | |||
| 134 | memory = dev.AllocateMemory(memory_ai); | ||
| 135 | buffer.BindMemory(*memory, 0); | ||
| 128 | } | 136 | } |
| 129 | 137 | ||
| 130 | void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { | 138 | void VKStreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) { |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 187c0c612..58ce8b973 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/renderer_vulkan/declarations.h" | 12 | #include "video_core/renderer_vulkan/wrapper.h" |
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -21,7 +21,7 @@ class VKScheduler; | |||
| 21 | class VKStreamBuffer final { | 21 | class VKStreamBuffer final { |
| 22 | public: | 22 | public: |
| 23 | explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, | 23 | explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, |
| 24 | vk::BufferUsageFlags usage); | 24 | VkBufferUsageFlags usage); |
| 25 | ~VKStreamBuffer(); | 25 | ~VKStreamBuffer(); |
| 26 | 26 | ||
| 27 | /** | 27 | /** |
| @@ -35,7 +35,7 @@ public: | |||
| 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 36 | void Unmap(u64 size); | 36 | void Unmap(u64 size); |
| 37 | 37 | ||
| 38 | vk::Buffer GetHandle() const { | 38 | VkBuffer GetHandle() const { |
| 39 | return *buffer; | 39 | return *buffer; |
| 40 | } | 40 | } |
| 41 | 41 | ||
| @@ -46,20 +46,18 @@ private: | |||
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | /// Creates Vulkan buffer handles committing the required the required memory. | 48 | /// Creates Vulkan buffer handles committing the required the required memory. |
| 49 | void CreateBuffers(vk::BufferUsageFlags usage); | 49 | void CreateBuffers(VkBufferUsageFlags usage); |
| 50 | 50 | ||
| 51 | /// Increases the amount of watches available. | 51 | /// Increases the amount of watches available. |
| 52 | void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); | 52 | void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size); |
| 53 | 53 | ||
| 54 | void WaitPendingOperations(u64 requested_upper_bound); | 54 | void WaitPendingOperations(u64 requested_upper_bound); |
| 55 | 55 | ||
| 56 | const VKDevice& device; ///< Vulkan device manager. | 56 | const VKDevice& device; ///< Vulkan device manager. |
| 57 | VKScheduler& scheduler; ///< Command scheduler. | 57 | VKScheduler& scheduler; ///< Command scheduler. |
| 58 | const vk::AccessFlags access; ///< Access usage of this stream buffer. | ||
| 59 | const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. | ||
| 60 | 58 | ||
| 61 | UniqueBuffer buffer; ///< Mapped buffer. | 59 | vk::Buffer buffer; ///< Mapped buffer. |
| 62 | UniqueDeviceMemory memory; ///< Memory allocation. | 60 | vk::DeviceMemory memory; ///< Memory allocation. |
| 63 | 61 | ||
| 64 | u64 offset{}; ///< Buffer iterator. | 62 | u64 offset{}; ///< Buffer iterator. |
| 65 | u64 mapped_size{}; ///< Size reserved for the current copy. | 63 | u64 mapped_size{}; ///< Size reserved for the current copy. |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 9e73fa9cd..bffd8f32a 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -11,69 +11,64 @@ | |||
| 11 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/frontend/framebuffer_layout.h" | 13 | #include "core/frontend/framebuffer_layout.h" |
| 14 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_device.h" | 14 | #include "video_core/renderer_vulkan/vk_device.h" |
| 16 | #include "video_core/renderer_vulkan/vk_resource_manager.h" | 15 | #include "video_core/renderer_vulkan/vk_resource_manager.h" |
| 17 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 16 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 17 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 18 | 18 | ||
| 19 | namespace Vulkan { | 19 | namespace Vulkan { |
| 20 | 20 | ||
| 21 | namespace { | 21 | namespace { |
| 22 | 22 | ||
| 23 | vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats, | 23 | VkSurfaceFormatKHR ChooseSwapSurfaceFormat(vk::Span<VkSurfaceFormatKHR> formats, bool srgb) { |
| 24 | bool srgb) { | 24 | if (formats.size() == 1 && formats[0].format == VK_FORMAT_UNDEFINED) { |
| 25 | if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { | 25 | VkSurfaceFormatKHR format; |
| 26 | vk::SurfaceFormatKHR format; | 26 | format.format = VK_FORMAT_B8G8R8A8_UNORM; |
| 27 | format.format = vk::Format::eB8G8R8A8Unorm; | 27 | format.colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; |
| 28 | format.colorSpace = vk::ColorSpaceKHR::eSrgbNonlinear; | ||
| 29 | return format; | 28 | return format; |
| 30 | } | 29 | } |
| 31 | const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { | 30 | const auto& found = std::find_if(formats.begin(), formats.end(), [srgb](const auto& format) { |
| 32 | const auto request_format = srgb ? vk::Format::eB8G8R8A8Srgb : vk::Format::eB8G8R8A8Unorm; | 31 | const auto request_format = srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM; |
| 33 | return format.format == request_format && | 32 | return format.format == request_format && |
| 34 | format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear; | 33 | format.colorSpace == VK_COLOR_SPACE_SRGB_NONLINEAR_KHR; |
| 35 | }); | 34 | }); |
| 36 | return found != formats.end() ? *found : formats[0]; | 35 | return found != formats.end() ? *found : formats[0]; |
| 37 | } | 36 | } |
| 38 | 37 | ||
| 39 | vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) { | 38 | VkPresentModeKHR ChooseSwapPresentMode(vk::Span<VkPresentModeKHR> modes) { |
| 40 | // Mailbox doesn't lock the application like fifo (vsync), prefer it | 39 | // Mailbox doesn't lock the application like fifo (vsync), prefer it |
| 41 | const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) { | 40 | const auto found = std::find(modes.begin(), modes.end(), VK_PRESENT_MODE_MAILBOX_KHR); |
| 42 | return mode == vk::PresentModeKHR::eMailbox; | 41 | return found != modes.end() ? *found : VK_PRESENT_MODE_FIFO_KHR; |
| 43 | }); | ||
| 44 | return found != modes.end() ? *found : vk::PresentModeKHR::eFifo; | ||
| 45 | } | 42 | } |
| 46 | 43 | ||
| 47 | vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, | 44 | VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height) { |
| 48 | u32 height) { | ||
| 49 | constexpr auto undefined_size{std::numeric_limits<u32>::max()}; | 45 | constexpr auto undefined_size{std::numeric_limits<u32>::max()}; |
| 50 | if (capabilities.currentExtent.width != undefined_size) { | 46 | if (capabilities.currentExtent.width != undefined_size) { |
| 51 | return capabilities.currentExtent; | 47 | return capabilities.currentExtent; |
| 52 | } | 48 | } |
| 53 | vk::Extent2D extent = {width, height}; | 49 | VkExtent2D extent; |
| 54 | extent.width = std::max(capabilities.minImageExtent.width, | 50 | extent.width = std::max(capabilities.minImageExtent.width, |
| 55 | std::min(capabilities.maxImageExtent.width, extent.width)); | 51 | std::min(capabilities.maxImageExtent.width, width)); |
| 56 | extent.height = std::max(capabilities.minImageExtent.height, | 52 | extent.height = std::max(capabilities.minImageExtent.height, |
| 57 | std::min(capabilities.maxImageExtent.height, extent.height)); | 53 | std::min(capabilities.maxImageExtent.height, height)); |
| 58 | return extent; | 54 | return extent; |
| 59 | } | 55 | } |
| 60 | 56 | ||
| 61 | } // Anonymous namespace | 57 | } // Anonymous namespace |
| 62 | 58 | ||
| 63 | VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device) | 59 | VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device) |
| 64 | : surface{surface}, device{device} {} | 60 | : surface{surface}, device{device} {} |
| 65 | 61 | ||
| 66 | VKSwapchain::~VKSwapchain() = default; | 62 | VKSwapchain::~VKSwapchain() = default; |
| 67 | 63 | ||
| 68 | void VKSwapchain::Create(u32 width, u32 height, bool srgb) { | 64 | void VKSwapchain::Create(u32 width, u32 height, bool srgb) { |
| 69 | const auto& dld = device.GetDispatchLoader(); | ||
| 70 | const auto physical_device = device.GetPhysical(); | 65 | const auto physical_device = device.GetPhysical(); |
| 71 | const auto capabilities{physical_device.getSurfaceCapabilitiesKHR(surface, dld)}; | 66 | const auto capabilities{physical_device.GetSurfaceCapabilitiesKHR(surface)}; |
| 72 | if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { | 67 | if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { |
| 73 | return; | 68 | return; |
| 74 | } | 69 | } |
| 75 | 70 | ||
| 76 | device.GetLogical().waitIdle(dld); | 71 | device.GetLogical().WaitIdle(); |
| 77 | Destroy(); | 72 | Destroy(); |
| 78 | 73 | ||
| 79 | CreateSwapchain(capabilities, width, height, srgb); | 74 | CreateSwapchain(capabilities, width, height, srgb); |
| @@ -84,10 +79,8 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { | |||
| 84 | } | 79 | } |
| 85 | 80 | ||
| 86 | void VKSwapchain::AcquireNextImage() { | 81 | void VKSwapchain::AcquireNextImage() { |
| 87 | const auto dev{device.GetLogical()}; | 82 | device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), |
| 88 | const auto& dld{device.GetDispatchLoader()}; | 83 | *present_semaphores[frame_index], {}, &image_index); |
| 89 | dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), | ||
| 90 | *present_semaphores[frame_index], {}, &image_index, dld); | ||
| 91 | 84 | ||
| 92 | if (auto& fence = fences[image_index]; fence) { | 85 | if (auto& fence = fences[image_index]; fence) { |
| 93 | fence->Wait(); | 86 | fence->Wait(); |
| @@ -96,29 +89,37 @@ void VKSwapchain::AcquireNextImage() { | |||
| 96 | } | 89 | } |
| 97 | } | 90 | } |
| 98 | 91 | ||
| 99 | bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) { | 92 | bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { |
| 100 | const vk::Semaphore present_semaphore{*present_semaphores[frame_index]}; | 93 | const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; |
| 101 | const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore}; | 94 | const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; |
| 102 | const u32 wait_semaphore_count{render_semaphore ? 2U : 1U}; | ||
| 103 | const auto& dld{device.GetDispatchLoader()}; | ||
| 104 | const auto present_queue{device.GetPresentQueue()}; | 95 | const auto present_queue{device.GetPresentQueue()}; |
| 105 | bool recreated = false; | 96 | bool recreated = false; |
| 106 | 97 | ||
| 107 | const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1, | 98 | VkPresentInfoKHR present_info; |
| 108 | &swapchain.get(), &image_index, {}); | 99 | present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; |
| 109 | switch (const auto result = present_queue.presentKHR(&present_info, dld); result) { | 100 | present_info.pNext = nullptr; |
| 110 | case vk::Result::eSuccess: | 101 | present_info.waitSemaphoreCount = render_semaphore ? 2U : 1U; |
| 102 | present_info.pWaitSemaphores = semaphores.data(); | ||
| 103 | present_info.swapchainCount = 1; | ||
| 104 | present_info.pSwapchains = swapchain.address(); | ||
| 105 | present_info.pImageIndices = &image_index; | ||
| 106 | present_info.pResults = nullptr; | ||
| 107 | |||
| 108 | switch (const VkResult result = present_queue.Present(present_info)) { | ||
| 109 | case VK_SUCCESS: | ||
| 110 | break; | ||
| 111 | case VK_SUBOPTIMAL_KHR: | ||
| 112 | LOG_DEBUG(Render_Vulkan, "Suboptimal swapchain"); | ||
| 111 | break; | 113 | break; |
| 112 | case vk::Result::eErrorOutOfDateKHR: | 114 | case VK_ERROR_OUT_OF_DATE_KHR: |
| 113 | if (current_width > 0 && current_height > 0) { | 115 | if (current_width > 0 && current_height > 0) { |
| 114 | Create(current_width, current_height, current_srgb); | 116 | Create(current_width, current_height, current_srgb); |
| 115 | recreated = true; | 117 | recreated = true; |
| 116 | } | 118 | } |
| 117 | break; | 119 | break; |
| 118 | default: | 120 | default: |
| 119 | LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!", | 121 | LOG_CRITICAL(Render_Vulkan, "Failed to present with error {}", vk::ToString(result)); |
| 120 | vk::to_string(result)); | 122 | break; |
| 121 | UNREACHABLE(); | ||
| 122 | } | 123 | } |
| 123 | 124 | ||
| 124 | ASSERT(fences[image_index] == nullptr); | 125 | ASSERT(fences[image_index] == nullptr); |
| @@ -132,74 +133,92 @@ bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebu | |||
| 132 | return framebuffer.width != current_width || framebuffer.height != current_height; | 133 | return framebuffer.width != current_width || framebuffer.height != current_height; |
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, | 136 | void VKSwapchain::CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, |
| 136 | u32 height, bool srgb) { | 137 | u32 height, bool srgb) { |
| 137 | const auto& dld{device.GetDispatchLoader()}; | ||
| 138 | const auto physical_device{device.GetPhysical()}; | 138 | const auto physical_device{device.GetPhysical()}; |
| 139 | const auto formats{physical_device.getSurfaceFormatsKHR(surface, dld)}; | 139 | const auto formats{physical_device.GetSurfaceFormatsKHR(surface)}; |
| 140 | const auto present_modes{physical_device.getSurfacePresentModesKHR(surface, dld)}; | 140 | const auto present_modes{physical_device.GetSurfacePresentModesKHR(surface)}; |
| 141 | 141 | ||
| 142 | const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; | 142 | const VkSurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats, srgb)}; |
| 143 | const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; | 143 | const VkPresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; |
| 144 | 144 | ||
| 145 | u32 requested_image_count{capabilities.minImageCount + 1}; | 145 | u32 requested_image_count{capabilities.minImageCount + 1}; |
| 146 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { | 146 | if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { |
| 147 | requested_image_count = capabilities.maxImageCount; | 147 | requested_image_count = capabilities.maxImageCount; |
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | vk::SwapchainCreateInfoKHR swapchain_ci( | 150 | VkSwapchainCreateInfoKHR swapchain_ci; |
| 151 | {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, {}, 1, | 151 | swapchain_ci.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; |
| 152 | vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, capabilities.currentTransform, | 152 | swapchain_ci.pNext = nullptr; |
| 153 | vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, {}); | 153 | swapchain_ci.flags = 0; |
| 154 | swapchain_ci.surface = surface; | ||
| 155 | swapchain_ci.minImageCount = requested_image_count; | ||
| 156 | swapchain_ci.imageFormat = surface_format.format; | ||
| 157 | swapchain_ci.imageColorSpace = surface_format.colorSpace; | ||
| 158 | swapchain_ci.imageArrayLayers = 1; | ||
| 159 | swapchain_ci.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; | ||
| 160 | swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; | ||
| 161 | swapchain_ci.queueFamilyIndexCount = 0; | ||
| 162 | swapchain_ci.pQueueFamilyIndices = nullptr; | ||
| 163 | swapchain_ci.preTransform = capabilities.currentTransform; | ||
| 164 | swapchain_ci.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; | ||
| 165 | swapchain_ci.presentMode = present_mode; | ||
| 166 | swapchain_ci.clipped = VK_FALSE; | ||
| 167 | swapchain_ci.oldSwapchain = nullptr; | ||
| 154 | 168 | ||
| 155 | const u32 graphics_family{device.GetGraphicsFamily()}; | 169 | const u32 graphics_family{device.GetGraphicsFamily()}; |
| 156 | const u32 present_family{device.GetPresentFamily()}; | 170 | const u32 present_family{device.GetPresentFamily()}; |
| 157 | const std::array<u32, 2> queue_indices{graphics_family, present_family}; | 171 | const std::array<u32, 2> queue_indices{graphics_family, present_family}; |
| 158 | if (graphics_family != present_family) { | 172 | if (graphics_family != present_family) { |
| 159 | swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent; | 173 | swapchain_ci.imageSharingMode = VK_SHARING_MODE_CONCURRENT; |
| 160 | swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); | 174 | swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); |
| 161 | swapchain_ci.pQueueFamilyIndices = queue_indices.data(); | 175 | swapchain_ci.pQueueFamilyIndices = queue_indices.data(); |
| 162 | } else { | 176 | } else { |
| 163 | swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; | 177 | swapchain_ci.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; |
| 164 | } | 178 | } |
| 165 | 179 | ||
| 166 | // Request the size again to reduce the possibility of a TOCTOU race condition. | 180 | // Request the size again to reduce the possibility of a TOCTOU race condition. |
| 167 | const auto updated_capabilities = physical_device.getSurfaceCapabilitiesKHR(surface, dld); | 181 | const auto updated_capabilities = physical_device.GetSurfaceCapabilitiesKHR(surface); |
| 168 | swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); | 182 | swapchain_ci.imageExtent = ChooseSwapExtent(updated_capabilities, width, height); |
| 169 | // Don't add code within this and the swapchain creation. | 183 | // Don't add code within this and the swapchain creation. |
| 170 | const auto dev{device.GetLogical()}; | 184 | swapchain = device.GetLogical().CreateSwapchainKHR(swapchain_ci); |
| 171 | swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); | ||
| 172 | 185 | ||
| 173 | extent = swapchain_ci.imageExtent; | 186 | extent = swapchain_ci.imageExtent; |
| 174 | current_width = extent.width; | 187 | current_width = extent.width; |
| 175 | current_height = extent.height; | 188 | current_height = extent.height; |
| 176 | current_srgb = srgb; | 189 | current_srgb = srgb; |
| 177 | 190 | ||
| 178 | images = dev.getSwapchainImagesKHR(*swapchain, dld); | 191 | images = swapchain.GetImages(); |
| 179 | image_count = static_cast<u32>(images.size()); | 192 | image_count = static_cast<u32>(images.size()); |
| 180 | image_format = surface_format.format; | 193 | image_format = surface_format.format; |
| 181 | } | 194 | } |
| 182 | 195 | ||
| 183 | void VKSwapchain::CreateSemaphores() { | 196 | void VKSwapchain::CreateSemaphores() { |
| 184 | const auto dev{device.GetLogical()}; | ||
| 185 | const auto& dld{device.GetDispatchLoader()}; | ||
| 186 | |||
| 187 | present_semaphores.resize(image_count); | 197 | present_semaphores.resize(image_count); |
| 188 | for (std::size_t i = 0; i < image_count; i++) { | 198 | std::generate(present_semaphores.begin(), present_semaphores.end(), |
| 189 | present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); | 199 | [this] { return device.GetLogical().CreateSemaphore(); }); |
| 190 | } | ||
| 191 | } | 200 | } |
| 192 | 201 | ||
| 193 | void VKSwapchain::CreateImageViews() { | 202 | void VKSwapchain::CreateImageViews() { |
| 194 | const auto dev{device.GetLogical()}; | 203 | VkImageViewCreateInfo ci; |
| 195 | const auto& dld{device.GetDispatchLoader()}; | 204 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; |
| 205 | ci.pNext = nullptr; | ||
| 206 | ci.flags = 0; | ||
| 207 | // ci.image | ||
| 208 | ci.viewType = VK_IMAGE_VIEW_TYPE_2D; | ||
| 209 | ci.format = image_format; | ||
| 210 | ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 211 | VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; | ||
| 212 | ci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; | ||
| 213 | ci.subresourceRange.baseMipLevel = 0; | ||
| 214 | ci.subresourceRange.levelCount = 1; | ||
| 215 | ci.subresourceRange.baseArrayLayer = 0; | ||
| 216 | ci.subresourceRange.layerCount = 1; | ||
| 196 | 217 | ||
| 197 | image_views.resize(image_count); | 218 | image_views.resize(image_count); |
| 198 | for (std::size_t i = 0; i < image_count; i++) { | 219 | for (std::size_t i = 0; i < image_count; i++) { |
| 199 | const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D, | 220 | ci.image = images[i]; |
| 200 | image_format, {}, | 221 | image_views[i] = device.GetLogical().CreateImageView(ci); |
| 201 | {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}); | ||
| 202 | image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld); | ||
| 203 | } | 222 | } |
| 204 | } | 223 | } |
| 205 | 224 | ||
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 2f3b2ccd5..a35d61345 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -7,7 +7,7 @@ | |||
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | 10 | #include "video_core/renderer_vulkan/wrapper.h" |
| 11 | 11 | ||
| 12 | namespace Layout { | 12 | namespace Layout { |
| 13 | struct FramebufferLayout; | 13 | struct FramebufferLayout; |
| @@ -20,7 +20,7 @@ class VKFence; | |||
| 20 | 20 | ||
| 21 | class VKSwapchain { | 21 | class VKSwapchain { |
| 22 | public: | 22 | public: |
| 23 | explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device); | 23 | explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device); |
| 24 | ~VKSwapchain(); | 24 | ~VKSwapchain(); |
| 25 | 25 | ||
| 26 | /// Creates (or recreates) the swapchain with a given size. | 26 | /// Creates (or recreates) the swapchain with a given size. |
| @@ -31,12 +31,12 @@ public: | |||
| 31 | 31 | ||
| 32 | /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be | 32 | /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be |
| 33 | /// recreated. Takes responsability for the ownership of fence. | 33 | /// recreated. Takes responsability for the ownership of fence. |
| 34 | bool Present(vk::Semaphore render_semaphore, VKFence& fence); | 34 | bool Present(VkSemaphore render_semaphore, VKFence& fence); |
| 35 | 35 | ||
| 36 | /// Returns true when the framebuffer layout has changed. | 36 | /// Returns true when the framebuffer layout has changed. |
| 37 | bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; | 37 | bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; |
| 38 | 38 | ||
| 39 | const vk::Extent2D& GetSize() const { | 39 | VkExtent2D GetSize() const { |
| 40 | return extent; | 40 | return extent; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| @@ -48,15 +48,15 @@ public: | |||
| 48 | return image_index; | 48 | return image_index; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | vk::Image GetImageIndex(std::size_t index) const { | 51 | VkImage GetImageIndex(std::size_t index) const { |
| 52 | return images[index]; | 52 | return images[index]; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | vk::ImageView GetImageViewIndex(std::size_t index) const { | 55 | VkImageView GetImageViewIndex(std::size_t index) const { |
| 56 | return *image_views[index]; | 56 | return *image_views[index]; |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | vk::Format GetImageFormat() const { | 59 | VkFormat GetImageFormat() const { |
| 60 | return image_format; | 60 | return image_format; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| @@ -65,30 +65,30 @@ public: | |||
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | private: | 67 | private: |
| 68 | void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, | 68 | void CreateSwapchain(const VkSurfaceCapabilitiesKHR& capabilities, u32 width, u32 height, |
| 69 | bool srgb); | 69 | bool srgb); |
| 70 | void CreateSemaphores(); | 70 | void CreateSemaphores(); |
| 71 | void CreateImageViews(); | 71 | void CreateImageViews(); |
| 72 | 72 | ||
| 73 | void Destroy(); | 73 | void Destroy(); |
| 74 | 74 | ||
| 75 | const vk::SurfaceKHR surface; | 75 | const VkSurfaceKHR surface; |
| 76 | const VKDevice& device; | 76 | const VKDevice& device; |
| 77 | 77 | ||
| 78 | UniqueSwapchainKHR swapchain; | 78 | vk::SwapchainKHR swapchain; |
| 79 | 79 | ||
| 80 | std::size_t image_count{}; | 80 | std::size_t image_count{}; |
| 81 | std::vector<vk::Image> images; | 81 | std::vector<VkImage> images; |
| 82 | std::vector<UniqueImageView> image_views; | 82 | std::vector<vk::ImageView> image_views; |
| 83 | std::vector<UniqueFramebuffer> framebuffers; | 83 | std::vector<vk::Framebuffer> framebuffers; |
| 84 | std::vector<VKFence*> fences; | 84 | std::vector<VKFence*> fences; |
| 85 | std::vector<UniqueSemaphore> present_semaphores; | 85 | std::vector<vk::Semaphore> present_semaphores; |
| 86 | 86 | ||
| 87 | u32 image_index{}; | 87 | u32 image_index{}; |
| 88 | u32 frame_index{}; | 88 | u32 frame_index{}; |
| 89 | 89 | ||
| 90 | vk::Format image_format{}; | 90 | VkFormat image_format{}; |
| 91 | vk::Extent2D extent{}; | 91 | VkExtent2D extent{}; |
| 92 | 92 | ||
| 93 | u32 current_width{}; | 93 | u32 current_width{}; |
| 94 | u32 current_height{}; | 94 | u32 current_height{}; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 26175921b..de4c23120 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include "core/memory.h" | 17 | #include "core/memory.h" |
| 18 | #include "video_core/engines/maxwell_3d.h" | 18 | #include "video_core/engines/maxwell_3d.h" |
| 19 | #include "video_core/morton.h" | 19 | #include "video_core/morton.h" |
| 20 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 21 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 20 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 22 | #include "video_core/renderer_vulkan/vk_device.h" | 21 | #include "video_core/renderer_vulkan/vk_device.h" |
| 23 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 22 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| @@ -25,6 +24,7 @@ | |||
| 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 24 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 26 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 25 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 27 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 27 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 28 | #include "video_core/surface.h" | 28 | #include "video_core/surface.h" |
| 29 | #include "video_core/textures/convert.h" | 29 | #include "video_core/textures/convert.h" |
| 30 | 30 | ||
| @@ -35,23 +35,22 @@ using VideoCore::MortonSwizzleMode; | |||
| 35 | 35 | ||
| 36 | using Tegra::Texture::SwizzleSource; | 36 | using Tegra::Texture::SwizzleSource; |
| 37 | using VideoCore::Surface::PixelFormat; | 37 | using VideoCore::Surface::PixelFormat; |
| 38 | using VideoCore::Surface::SurfaceCompression; | ||
| 39 | using VideoCore::Surface::SurfaceTarget; | 38 | using VideoCore::Surface::SurfaceTarget; |
| 40 | 39 | ||
| 41 | namespace { | 40 | namespace { |
| 42 | 41 | ||
| 43 | vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { | 42 | VkImageType SurfaceTargetToImage(SurfaceTarget target) { |
| 44 | switch (target) { | 43 | switch (target) { |
| 45 | case SurfaceTarget::Texture1D: | 44 | case SurfaceTarget::Texture1D: |
| 46 | case SurfaceTarget::Texture1DArray: | 45 | case SurfaceTarget::Texture1DArray: |
| 47 | return vk::ImageType::e1D; | 46 | return VK_IMAGE_TYPE_1D; |
| 48 | case SurfaceTarget::Texture2D: | 47 | case SurfaceTarget::Texture2D: |
| 49 | case SurfaceTarget::Texture2DArray: | 48 | case SurfaceTarget::Texture2DArray: |
| 50 | case SurfaceTarget::TextureCubemap: | 49 | case SurfaceTarget::TextureCubemap: |
| 51 | case SurfaceTarget::TextureCubeArray: | 50 | case SurfaceTarget::TextureCubeArray: |
| 52 | return vk::ImageType::e2D; | 51 | return VK_IMAGE_TYPE_2D; |
| 53 | case SurfaceTarget::Texture3D: | 52 | case SurfaceTarget::Texture3D: |
| 54 | return vk::ImageType::e3D; | 53 | return VK_IMAGE_TYPE_3D; |
| 55 | case SurfaceTarget::TextureBuffer: | 54 | case SurfaceTarget::TextureBuffer: |
| 56 | UNREACHABLE(); | 55 | UNREACHABLE(); |
| 57 | return {}; | 56 | return {}; |
| @@ -60,35 +59,35 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { | |||
| 60 | return {}; | 59 | return {}; |
| 61 | } | 60 | } |
| 62 | 61 | ||
| 63 | vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { | 62 | VkImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) { |
| 64 | if (pixel_format < PixelFormat::MaxColorFormat) { | 63 | if (pixel_format < PixelFormat::MaxColorFormat) { |
| 65 | return vk::ImageAspectFlagBits::eColor; | 64 | return VK_IMAGE_ASPECT_COLOR_BIT; |
| 66 | } else if (pixel_format < PixelFormat::MaxDepthFormat) { | 65 | } else if (pixel_format < PixelFormat::MaxDepthFormat) { |
| 67 | return vk::ImageAspectFlagBits::eDepth; | 66 | return VK_IMAGE_ASPECT_DEPTH_BIT; |
| 68 | } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { | 67 | } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) { |
| 69 | return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; | 68 | return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; |
| 70 | } else { | 69 | } else { |
| 71 | UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format)); | 70 | UNREACHABLE_MSG("Invalid pixel format={}", static_cast<int>(pixel_format)); |
| 72 | return vk::ImageAspectFlagBits::eColor; | 71 | return VK_IMAGE_ASPECT_COLOR_BIT; |
| 73 | } | 72 | } |
| 74 | } | 73 | } |
| 75 | 74 | ||
| 76 | vk::ImageViewType GetImageViewType(SurfaceTarget target) { | 75 | VkImageViewType GetImageViewType(SurfaceTarget target) { |
| 77 | switch (target) { | 76 | switch (target) { |
| 78 | case SurfaceTarget::Texture1D: | 77 | case SurfaceTarget::Texture1D: |
| 79 | return vk::ImageViewType::e1D; | 78 | return VK_IMAGE_VIEW_TYPE_1D; |
| 80 | case SurfaceTarget::Texture2D: | 79 | case SurfaceTarget::Texture2D: |
| 81 | return vk::ImageViewType::e2D; | 80 | return VK_IMAGE_VIEW_TYPE_2D; |
| 82 | case SurfaceTarget::Texture3D: | 81 | case SurfaceTarget::Texture3D: |
| 83 | return vk::ImageViewType::e3D; | 82 | return VK_IMAGE_VIEW_TYPE_3D; |
| 84 | case SurfaceTarget::Texture1DArray: | 83 | case SurfaceTarget::Texture1DArray: |
| 85 | return vk::ImageViewType::e1DArray; | 84 | return VK_IMAGE_VIEW_TYPE_1D_ARRAY; |
| 86 | case SurfaceTarget::Texture2DArray: | 85 | case SurfaceTarget::Texture2DArray: |
| 87 | return vk::ImageViewType::e2DArray; | 86 | return VK_IMAGE_VIEW_TYPE_2D_ARRAY; |
| 88 | case SurfaceTarget::TextureCubemap: | 87 | case SurfaceTarget::TextureCubemap: |
| 89 | return vk::ImageViewType::eCube; | 88 | return VK_IMAGE_VIEW_TYPE_CUBE; |
| 90 | case SurfaceTarget::TextureCubeArray: | 89 | case SurfaceTarget::TextureCubeArray: |
| 91 | return vk::ImageViewType::eCubeArray; | 90 | return VK_IMAGE_VIEW_TYPE_CUBE_ARRAY; |
| 92 | case SurfaceTarget::TextureBuffer: | 91 | case SurfaceTarget::TextureBuffer: |
| 93 | break; | 92 | break; |
| 94 | } | 93 | } |
| @@ -96,71 +95,88 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) { | |||
| 96 | return {}; | 95 | return {}; |
| 97 | } | 96 | } |
| 98 | 97 | ||
| 99 | UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { | 98 | vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, |
| 99 | std::size_t host_memory_size) { | ||
| 100 | // TODO(Rodrigo): Move texture buffer creation to the buffer cache | 100 | // TODO(Rodrigo): Move texture buffer creation to the buffer cache |
| 101 | const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(), | 101 | VkBufferCreateInfo ci; |
| 102 | vk::BufferUsageFlagBits::eUniformTexelBuffer | | 102 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| 103 | vk::BufferUsageFlagBits::eTransferSrc | | 103 | ci.pNext = nullptr; |
| 104 | vk::BufferUsageFlagBits::eTransferDst, | 104 | ci.flags = 0; |
| 105 | vk::SharingMode::eExclusive, 0, nullptr); | 105 | ci.size = static_cast<VkDeviceSize>(host_memory_size); |
| 106 | const auto dev = device.GetLogical(); | 106 | ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | |
| 107 | const auto& dld = device.GetDispatchLoader(); | 107 | VK_BUFFER_USAGE_TRANSFER_DST_BIT; |
| 108 | return dev.createBufferUnique(buffer_ci, nullptr, dld); | 108 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; |
| 109 | ci.queueFamilyIndexCount = 0; | ||
| 110 | ci.pQueueFamilyIndices = nullptr; | ||
| 111 | return device.GetLogical().CreateBuffer(ci); | ||
| 109 | } | 112 | } |
| 110 | 113 | ||
| 111 | vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, | 114 | VkBufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, |
| 112 | const SurfaceParams& params, | 115 | const SurfaceParams& params, VkBuffer buffer, |
| 113 | vk::Buffer buffer) { | 116 | std::size_t host_memory_size) { |
| 114 | ASSERT(params.IsBuffer()); | 117 | ASSERT(params.IsBuffer()); |
| 115 | 118 | ||
| 116 | const auto format = | 119 | VkBufferViewCreateInfo ci; |
| 117 | MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; | 120 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO; |
| 118 | return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes()); | 121 | ci.pNext = nullptr; |
| 122 | ci.flags = 0; | ||
| 123 | ci.buffer = buffer; | ||
| 124 | ci.format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; | ||
| 125 | ci.offset = 0; | ||
| 126 | ci.range = static_cast<VkDeviceSize>(host_memory_size); | ||
| 127 | return ci; | ||
| 119 | } | 128 | } |
| 120 | 129 | ||
| 121 | vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { | 130 | VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { |
| 122 | constexpr auto sample_count = vk::SampleCountFlagBits::e1; | ||
| 123 | constexpr auto tiling = vk::ImageTiling::eOptimal; | ||
| 124 | |||
| 125 | ASSERT(!params.IsBuffer()); | 131 | ASSERT(!params.IsBuffer()); |
| 126 | 132 | ||
| 127 | const auto [format, attachable, storage] = | 133 | const auto [format, attachable, storage] = |
| 128 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); | 134 | MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format); |
| 129 | 135 | ||
| 130 | auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | | 136 | VkImageCreateInfo ci; |
| 131 | vk::ImageUsageFlagBits::eTransferSrc; | 137 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; |
| 138 | ci.pNext = nullptr; | ||
| 139 | ci.flags = 0; | ||
| 140 | ci.imageType = SurfaceTargetToImage(params.target); | ||
| 141 | ci.format = format; | ||
| 142 | ci.mipLevels = params.num_levels; | ||
| 143 | ci.arrayLayers = static_cast<u32>(params.GetNumLayers()); | ||
| 144 | ci.samples = VK_SAMPLE_COUNT_1_BIT; | ||
| 145 | ci.tiling = VK_IMAGE_TILING_OPTIMAL; | ||
| 146 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | ||
| 147 | ci.queueFamilyIndexCount = 0; | ||
| 148 | ci.pQueueFamilyIndices = nullptr; | ||
| 149 | ci.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; | ||
| 150 | |||
| 151 | ci.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | | ||
| 152 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; | ||
| 132 | if (attachable) { | 153 | if (attachable) { |
| 133 | image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment | 154 | ci.usage |= params.IsPixelFormatZeta() ? VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
| 134 | : vk::ImageUsageFlagBits::eColorAttachment; | 155 | : VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; |
| 135 | } | 156 | } |
| 136 | if (storage) { | 157 | if (storage) { |
| 137 | image_usage |= vk::ImageUsageFlagBits::eStorage; | 158 | ci.usage |= VK_IMAGE_USAGE_STORAGE_BIT; |
| 138 | } | 159 | } |
| 139 | 160 | ||
| 140 | vk::ImageCreateFlags flags; | ||
| 141 | vk::Extent3D extent; | ||
| 142 | switch (params.target) { | 161 | switch (params.target) { |
| 143 | case SurfaceTarget::TextureCubemap: | 162 | case SurfaceTarget::TextureCubemap: |
| 144 | case SurfaceTarget::TextureCubeArray: | 163 | case SurfaceTarget::TextureCubeArray: |
| 145 | flags |= vk::ImageCreateFlagBits::eCubeCompatible; | 164 | ci.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT; |
| 146 | [[fallthrough]]; | 165 | [[fallthrough]]; |
| 147 | case SurfaceTarget::Texture1D: | 166 | case SurfaceTarget::Texture1D: |
| 148 | case SurfaceTarget::Texture1DArray: | 167 | case SurfaceTarget::Texture1DArray: |
| 149 | case SurfaceTarget::Texture2D: | 168 | case SurfaceTarget::Texture2D: |
| 150 | case SurfaceTarget::Texture2DArray: | 169 | case SurfaceTarget::Texture2DArray: |
| 151 | extent = vk::Extent3D(params.width, params.height, 1); | 170 | ci.extent = {params.width, params.height, 1}; |
| 152 | break; | 171 | break; |
| 153 | case SurfaceTarget::Texture3D: | 172 | case SurfaceTarget::Texture3D: |
| 154 | extent = vk::Extent3D(params.width, params.height, params.depth); | 173 | ci.extent = {params.width, params.height, params.depth}; |
| 155 | break; | 174 | break; |
| 156 | case SurfaceTarget::TextureBuffer: | 175 | case SurfaceTarget::TextureBuffer: |
| 157 | UNREACHABLE(); | 176 | UNREACHABLE(); |
| 158 | } | 177 | } |
| 159 | 178 | ||
| 160 | return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent, | 179 | return ci; |
| 161 | params.num_levels, static_cast<u32>(params.GetNumLayers()), | ||
| 162 | sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0, | ||
| 163 | nullptr, vk::ImageLayout::eUndefined); | ||
| 164 | } | 180 | } |
| 165 | 181 | ||
| 166 | } // Anonymous namespace | 182 | } // Anonymous namespace |
| @@ -169,19 +185,18 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, | |||
| 169 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, | 185 | VKResourceManager& resource_manager, VKMemoryManager& memory_manager, |
| 170 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool, | 186 | VKScheduler& scheduler, VKStagingBufferPool& staging_pool, |
| 171 | GPUVAddr gpu_addr, const SurfaceParams& params) | 187 | GPUVAddr gpu_addr, const SurfaceParams& params) |
| 172 | : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device}, | 188 | : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system}, |
| 173 | resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, | 189 | device{device}, resource_manager{resource_manager}, |
| 174 | staging_pool{staging_pool} { | 190 | memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { |
| 175 | if (params.IsBuffer()) { | 191 | if (params.IsBuffer()) { |
| 176 | buffer = CreateBuffer(device, params); | 192 | buffer = CreateBuffer(device, params, host_memory_size); |
| 177 | commit = memory_manager.Commit(*buffer, false); | 193 | commit = memory_manager.Commit(buffer, false); |
| 178 | 194 | ||
| 179 | const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer); | 195 | const auto buffer_view_ci = |
| 196 | GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); | ||
| 180 | format = buffer_view_ci.format; | 197 | format = buffer_view_ci.format; |
| 181 | 198 | ||
| 182 | const auto dev = device.GetLogical(); | 199 | buffer_view = device.GetLogical().CreateBufferView(buffer_view_ci); |
| 183 | const auto& dld = device.GetDispatchLoader(); | ||
| 184 | buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld); | ||
| 185 | } else { | 200 | } else { |
| 186 | const auto image_ci = GenerateImageCreateInfo(device, params); | 201 | const auto image_ci = GenerateImageCreateInfo(device, params); |
| 187 | format = image_ci.format; | 202 | format = image_ci.format; |
| @@ -219,16 +234,15 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { | |||
| 219 | // We can't copy images to buffers inside a renderpass | 234 | // We can't copy images to buffers inside a renderpass |
| 220 | scheduler.RequestOutsideRenderPassOperationContext(); | 235 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 221 | 236 | ||
| 222 | FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, | 237 | FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, |
| 223 | vk::ImageLayout::eTransferSrcOptimal); | 238 | VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); |
| 224 | 239 | ||
| 225 | const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 240 | const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); |
| 226 | // TODO(Rodrigo): Do this in a single copy | 241 | // TODO(Rodrigo): Do this in a single copy |
| 227 | for (u32 level = 0; level < params.num_levels; ++level) { | 242 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 228 | scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle, | 243 | scheduler.Record([image = *image->GetHandle(), buffer = *buffer.handle, |
| 229 | copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) { | 244 | copy = GetBufferImageCopy(level)](vk::CommandBuffer cmdbuf) { |
| 230 | cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy}, | 245 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); |
| 231 | dld); | ||
| 232 | }); | 246 | }); |
| 233 | } | 247 | } |
| 234 | scheduler.Finish(); | 248 | scheduler.Finish(); |
| @@ -255,15 +269,27 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { | |||
| 255 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | 269 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); |
| 256 | 270 | ||
| 257 | scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, | 271 | scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, |
| 258 | size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) { | 272 | size = host_memory_size](vk::CommandBuffer cmdbuf) { |
| 259 | const vk::BufferCopy copy(0, 0, size); | 273 | VkBufferCopy copy; |
| 260 | cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); | 274 | copy.srcOffset = 0; |
| 261 | 275 | copy.dstOffset = 0; | |
| 262 | cmdbuf.pipelineBarrier( | 276 | copy.size = size; |
| 263 | vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {}, | 277 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); |
| 264 | {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, | 278 | |
| 265 | vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)}, | 279 | VkBufferMemoryBarrier barrier; |
| 266 | {}, dld); | 280 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| 281 | barrier.pNext = nullptr; | ||
| 282 | barrier.srcAccessMask = VK_PIPELINE_STAGE_TRANSFER_BIT; | ||
| 283 | barrier.dstAccessMask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT; | ||
| 284 | barrier.srcQueueFamilyIndex = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 285 | barrier.dstQueueFamilyIndex = VK_ACCESS_SHADER_READ_BIT; | ||
| 286 | barrier.srcQueueFamilyIndex = 0; | ||
| 287 | barrier.dstQueueFamilyIndex = 0; | ||
| 288 | barrier.buffer = dst_buffer; | ||
| 289 | barrier.offset = 0; | ||
| 290 | barrier.size = size; | ||
| 291 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, | ||
| 292 | 0, {}, barrier, {}); | ||
| 267 | }); | 293 | }); |
| 268 | } | 294 | } |
| 269 | 295 | ||
| @@ -271,46 +297,49 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { | |||
| 271 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); | 297 | const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true); |
| 272 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); | 298 | std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); |
| 273 | 299 | ||
| 274 | FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, | 300 | FullTransition(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, |
| 275 | vk::ImageLayout::eTransferDstOptimal); | 301 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); |
| 276 | 302 | ||
| 277 | for (u32 level = 0; level < params.num_levels; ++level) { | 303 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 278 | vk::BufferImageCopy copy = GetBufferImageCopy(level); | 304 | const VkBufferImageCopy copy = GetBufferImageCopy(level); |
| 279 | if (image->GetAspectMask() == | 305 | if (image->GetAspectMask() == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| 280 | (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | 306 | scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), |
| 281 | vk::BufferImageCopy depth = copy; | 307 | copy](vk::CommandBuffer cmdbuf) { |
| 282 | vk::BufferImageCopy stencil = copy; | 308 | std::array<VkBufferImageCopy, 2> copies = {copy, copy}; |
| 283 | depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; | 309 | copies[0].imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; |
| 284 | stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; | 310 | copies[1].imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; |
| 285 | scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth, | 311 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| 286 | stencil](auto cmdbuf, auto& dld) { | 312 | copies); |
| 287 | cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, | ||
| 288 | {depth, stencil}, dld); | ||
| 289 | }); | 313 | }); |
| 290 | } else { | 314 | } else { |
| 291 | scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), | 315 | scheduler.Record([buffer = *src_buffer.handle, image = *image->GetHandle(), |
| 292 | copy](auto cmdbuf, auto& dld) { | 316 | copy](vk::CommandBuffer cmdbuf) { |
| 293 | cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, | 317 | cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); |
| 294 | {copy}, dld); | ||
| 295 | }); | 318 | }); |
| 296 | } | 319 | } |
| 297 | } | 320 | } |
| 298 | } | 321 | } |
| 299 | 322 | ||
| 300 | vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { | 323 | VkBufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { |
| 301 | const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; | 324 | VkBufferImageCopy copy; |
| 302 | const auto compression_type = params.GetCompressionType(); | 325 | copy.bufferOffset = params.GetHostMipmapLevelOffset(level, is_converted); |
| 303 | const std::size_t mip_offset = compression_type == SurfaceCompression::Converted | 326 | copy.bufferRowLength = 0; |
| 304 | ? params.GetConvertedMipmapOffset(level) | 327 | copy.bufferImageHeight = 0; |
| 305 | : params.GetHostMipmapLevelOffset(level); | 328 | copy.imageSubresource.aspectMask = image->GetAspectMask(); |
| 306 | 329 | copy.imageSubresource.mipLevel = level; | |
| 307 | return vk::BufferImageCopy( | 330 | copy.imageSubresource.baseArrayLayer = 0; |
| 308 | mip_offset, 0, 0, | 331 | copy.imageSubresource.layerCount = static_cast<u32>(params.GetNumLayers()); |
| 309 | {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0}, | 332 | copy.imageOffset.x = 0; |
| 310 | {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth}); | 333 | copy.imageOffset.y = 0; |
| 334 | copy.imageOffset.z = 0; | ||
| 335 | copy.imageExtent.width = params.GetMipWidth(level); | ||
| 336 | copy.imageExtent.height = params.GetMipHeight(level); | ||
| 337 | copy.imageExtent.depth = | ||
| 338 | params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; | ||
| 339 | return copy; | ||
| 311 | } | 340 | } |
| 312 | 341 | ||
| 313 | vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { | 342 | VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { |
| 314 | return {image->GetAspectMask(), 0, params.num_levels, 0, | 343 | return {image->GetAspectMask(), 0, params.num_levels, 0, |
| 315 | static_cast<u32>(params.GetNumLayers())}; | 344 | static_cast<u32>(params.GetNumLayers())}; |
| 316 | } | 345 | } |
| @@ -322,12 +351,12 @@ CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surf | |||
| 322 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, | 351 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, |
| 323 | base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, | 352 | base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, |
| 324 | num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) | 353 | num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) |
| 325 | : vk::ImageViewType{}} {} | 354 | : VK_IMAGE_VIEW_TYPE_1D} {} |
| 326 | 355 | ||
| 327 | CachedSurfaceView::~CachedSurfaceView() = default; | 356 | CachedSurfaceView::~CachedSurfaceView() = default; |
| 328 | 357 | ||
| 329 | vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, | 358 | VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, |
| 330 | SwizzleSource z_source, SwizzleSource w_source) { | 359 | SwizzleSource z_source, SwizzleSource w_source) { |
| 331 | const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 360 | const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); |
| 332 | if (last_image_view && last_swizzle == swizzle) { | 361 | if (last_image_view && last_swizzle == swizzle) { |
| 333 | return last_image_view; | 362 | return last_image_view; |
| @@ -352,37 +381,45 @@ vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource | |||
| 352 | 381 | ||
| 353 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on | 382 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on |
| 354 | // hardware. To emulate this on Vulkan we specify it in the aspect. | 383 | // hardware. To emulate this on Vulkan we specify it in the aspect. |
| 355 | vk::ImageAspectFlags aspect = aspect_mask; | 384 | VkImageAspectFlags aspect = aspect_mask; |
| 356 | if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { | 385 | if (aspect == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| 357 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); | 386 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); |
| 358 | const bool is_first = x_source == SwizzleSource::R; | 387 | const bool is_first = x_source == SwizzleSource::R; |
| 359 | switch (params.pixel_format) { | 388 | switch (params.pixel_format) { |
| 360 | case VideoCore::Surface::PixelFormat::Z24S8: | 389 | case VideoCore::Surface::PixelFormat::Z24S8: |
| 361 | case VideoCore::Surface::PixelFormat::Z32FS8: | 390 | case VideoCore::Surface::PixelFormat::Z32FS8: |
| 362 | aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil; | 391 | aspect = is_first ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_STENCIL_BIT; |
| 363 | break; | 392 | break; |
| 364 | case VideoCore::Surface::PixelFormat::S8Z24: | 393 | case VideoCore::Surface::PixelFormat::S8Z24: |
| 365 | aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth; | 394 | aspect = is_first ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT; |
| 366 | break; | 395 | break; |
| 367 | default: | 396 | default: |
| 368 | aspect = vk::ImageAspectFlagBits::eDepth; | 397 | aspect = VK_IMAGE_ASPECT_DEPTH_BIT; |
| 369 | UNIMPLEMENTED(); | 398 | UNIMPLEMENTED(); |
| 370 | } | 399 | } |
| 371 | 400 | ||
| 372 | // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity | 401 | // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity |
| 373 | swizzle_x = vk::ComponentSwizzle::eR; | 402 | swizzle_x = VK_COMPONENT_SWIZZLE_R; |
| 374 | swizzle_y = vk::ComponentSwizzle::eG; | 403 | swizzle_y = VK_COMPONENT_SWIZZLE_G; |
| 375 | swizzle_z = vk::ComponentSwizzle::eB; | 404 | swizzle_z = VK_COMPONENT_SWIZZLE_B; |
| 376 | swizzle_w = vk::ComponentSwizzle::eA; | 405 | swizzle_w = VK_COMPONENT_SWIZZLE_A; |
| 377 | } | 406 | } |
| 378 | 407 | ||
| 379 | const vk::ImageViewCreateInfo image_view_ci( | 408 | VkImageViewCreateInfo ci; |
| 380 | {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(), | 409 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; |
| 381 | {swizzle_x, swizzle_y, swizzle_z, swizzle_w}, | 410 | ci.pNext = nullptr; |
| 382 | {aspect, base_level, num_levels, base_layer, num_layers}); | 411 | ci.flags = 0; |
| 412 | ci.image = surface.GetImageHandle(); | ||
| 413 | ci.viewType = image_view_type; | ||
| 414 | ci.format = surface.GetImage().GetFormat(); | ||
| 415 | ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; | ||
| 416 | ci.subresourceRange.aspectMask = aspect; | ||
| 417 | ci.subresourceRange.baseMipLevel = base_level; | ||
| 418 | ci.subresourceRange.levelCount = num_levels; | ||
| 419 | ci.subresourceRange.baseArrayLayer = base_layer; | ||
| 420 | ci.subresourceRange.layerCount = num_layers; | ||
| 421 | image_view = device.GetLogical().CreateImageView(ci); | ||
| 383 | 422 | ||
| 384 | const auto dev = device.GetLogical(); | ||
| 385 | image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader()); | ||
| 386 | return last_image_view = *image_view; | 423 | return last_image_view = *image_view; |
| 387 | } | 424 | } |
| 388 | 425 | ||
| @@ -390,8 +427,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf | |||
| 390 | const VKDevice& device, VKResourceManager& resource_manager, | 427 | const VKDevice& device, VKResourceManager& resource_manager, |
| 391 | VKMemoryManager& memory_manager, VKScheduler& scheduler, | 428 | VKMemoryManager& memory_manager, VKScheduler& scheduler, |
| 392 | VKStagingBufferPool& staging_pool) | 429 | VKStagingBufferPool& staging_pool) |
| 393 | : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager}, | 430 | : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device}, |
| 394 | memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {} | 431 | resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, |
| 432 | staging_pool{staging_pool} {} | ||
| 395 | 433 | ||
| 396 | VKTextureCache::~VKTextureCache() = default; | 434 | VKTextureCache::~VKTextureCache() = default; |
| 397 | 435 | ||
| @@ -418,25 +456,36 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, | |||
| 418 | scheduler.RequestOutsideRenderPassOperationContext(); | 456 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 419 | 457 | ||
| 420 | src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, | 458 | src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1, |
| 421 | vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead, | 459 | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, |
| 422 | vk::ImageLayout::eTransferSrcOptimal); | 460 | VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); |
| 423 | dst_surface->Transition( | 461 | dst_surface->Transition(dst_base_layer, num_layers, copy_params.dest_level, 1, |
| 424 | dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, | 462 | VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, |
| 425 | vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); | 463 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); |
| 426 | 464 | ||
| 427 | const vk::ImageSubresourceLayers src_subresource( | 465 | VkImageCopy copy; |
| 428 | src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); | 466 | copy.srcSubresource.aspectMask = src_surface->GetAspectMask(); |
| 429 | const vk::ImageSubresourceLayers dst_subresource( | 467 | copy.srcSubresource.mipLevel = copy_params.source_level; |
| 430 | dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers); | 468 | copy.srcSubresource.baseArrayLayer = copy_params.source_z; |
| 431 | const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0); | 469 | copy.srcSubresource.layerCount = num_layers; |
| 432 | const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z); | 470 | copy.srcOffset.x = copy_params.source_x; |
| 433 | const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z); | 471 | copy.srcOffset.y = copy_params.source_y; |
| 434 | const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent); | 472 | copy.srcOffset.z = 0; |
| 435 | const vk::Image src_image = src_surface->GetImageHandle(); | 473 | copy.dstSubresource.aspectMask = dst_surface->GetAspectMask(); |
| 436 | const vk::Image dst_image = dst_surface->GetImageHandle(); | 474 | copy.dstSubresource.mipLevel = copy_params.dest_level; |
| 437 | scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) { | 475 | copy.dstSubresource.baseArrayLayer = dst_base_layer; |
| 438 | cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | 476 | copy.dstSubresource.layerCount = num_layers; |
| 439 | vk::ImageLayout::eTransferDstOptimal, {copy}, dld); | 477 | copy.dstOffset.x = copy_params.dest_x; |
| 478 | copy.dstOffset.y = copy_params.dest_y; | ||
| 479 | copy.dstOffset.z = dst_offset_z; | ||
| 480 | copy.extent.width = copy_params.width; | ||
| 481 | copy.extent.height = copy_params.height; | ||
| 482 | copy.extent.depth = extent_z; | ||
| 483 | |||
| 484 | const VkImage src_image = src_surface->GetImageHandle(); | ||
| 485 | const VkImage dst_image = dst_surface->GetImageHandle(); | ||
| 486 | scheduler.Record([src_image, dst_image, copy](vk::CommandBuffer cmdbuf) { | ||
| 487 | cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, | ||
| 488 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); | ||
| 440 | }); | 489 | }); |
| 441 | } | 490 | } |
| 442 | 491 | ||
| @@ -445,25 +494,34 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view, | |||
| 445 | // We can't blit inside a renderpass | 494 | // We can't blit inside a renderpass |
| 446 | scheduler.RequestOutsideRenderPassOperationContext(); | 495 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 447 | 496 | ||
| 448 | src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer, | 497 | src_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 449 | vk::AccessFlagBits::eTransferRead); | 498 | VK_ACCESS_TRANSFER_READ_BIT); |
| 450 | dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer, | 499 | dst_view->Transition(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 451 | vk::AccessFlagBits::eTransferWrite); | 500 | VK_ACCESS_TRANSFER_WRITE_BIT); |
| 452 | 501 | ||
| 453 | const auto& cfg = copy_config; | 502 | VkImageBlit blit; |
| 454 | const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0); | 503 | blit.srcSubresource = src_view->GetImageSubresourceLayers(); |
| 455 | const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1); | 504 | blit.srcOffsets[0].x = copy_config.src_rect.left; |
| 456 | const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0); | 505 | blit.srcOffsets[0].y = copy_config.src_rect.top; |
| 457 | const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1); | 506 | blit.srcOffsets[0].z = 0; |
| 458 | const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right}, | 507 | blit.srcOffsets[1].x = copy_config.src_rect.right; |
| 459 | dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); | 508 | blit.srcOffsets[1].y = copy_config.src_rect.bottom; |
| 509 | blit.srcOffsets[1].z = 1; | ||
| 510 | blit.dstSubresource = dst_view->GetImageSubresourceLayers(); | ||
| 511 | blit.dstOffsets[0].x = copy_config.dst_rect.left; | ||
| 512 | blit.dstOffsets[0].y = copy_config.dst_rect.top; | ||
| 513 | blit.dstOffsets[0].z = 0; | ||
| 514 | blit.dstOffsets[1].x = copy_config.dst_rect.right; | ||
| 515 | blit.dstOffsets[1].y = copy_config.dst_rect.bottom; | ||
| 516 | blit.dstOffsets[1].z = 1; | ||
| 517 | |||
| 460 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; | 518 | const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; |
| 461 | 519 | ||
| 462 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, | 520 | scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, |
| 463 | is_linear](auto cmdbuf, auto& dld) { | 521 | is_linear](vk::CommandBuffer cmdbuf) { |
| 464 | cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, | 522 | cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, |
| 465 | vk::ImageLayout::eTransferDstOptimal, {blit}, | 523 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit, |
| 466 | is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld); | 524 | is_linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); |
| 467 | }); | 525 | }); |
| 468 | } | 526 | } |
| 469 | 527 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 22e3d34de..115595f28 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -13,10 +13,10 @@ | |||
| 13 | #include "common/math_util.h" | 13 | #include "common/math_util.h" |
| 14 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 15 | #include "video_core/rasterizer_cache.h" | 15 | #include "video_core/rasterizer_cache.h" |
| 16 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_image.h" | 16 | #include "video_core/renderer_vulkan/vk_image.h" |
| 18 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 17 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 18 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 19 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 20 | #include "video_core/texture_cache/surface_base.h" | 20 | #include "video_core/texture_cache/surface_base.h" |
| 21 | #include "video_core/texture_cache/texture_cache.h" | 21 | #include "video_core/texture_cache/texture_cache.h" |
| 22 | #include "video_core/textures/decoders.h" | 22 | #include "video_core/textures/decoders.h" |
| @@ -60,15 +60,15 @@ public: | |||
| 60 | void UploadTexture(const std::vector<u8>& staging_buffer) override; | 60 | void UploadTexture(const std::vector<u8>& staging_buffer) override; |
| 61 | void DownloadTexture(std::vector<u8>& staging_buffer) override; | 61 | void DownloadTexture(std::vector<u8>& staging_buffer) override; |
| 62 | 62 | ||
| 63 | void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | 63 | void FullTransition(VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, |
| 64 | vk::ImageLayout new_layout) { | 64 | VkImageLayout new_layout) { |
| 65 | image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, | 65 | image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels, |
| 66 | new_stage_mask, new_access, new_layout); | 66 | new_stage_mask, new_access, new_layout); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, | 69 | void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, |
| 70 | vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, | 70 | VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access, |
| 71 | vk::ImageLayout new_layout) { | 71 | VkImageLayout new_layout) { |
| 72 | image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | 72 | image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, |
| 73 | new_access, new_layout); | 73 | new_access, new_layout); |
| 74 | } | 74 | } |
| @@ -81,15 +81,15 @@ public: | |||
| 81 | return *image; | 81 | return *image; |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | vk::Image GetImageHandle() const { | 84 | VkImage GetImageHandle() const { |
| 85 | return image->GetHandle(); | 85 | return *image->GetHandle(); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | vk::ImageAspectFlags GetAspectMask() const { | 88 | VkImageAspectFlags GetAspectMask() const { |
| 89 | return image->GetAspectMask(); | 89 | return image->GetAspectMask(); |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | vk::BufferView GetBufferViewHandle() const { | 92 | VkBufferView GetBufferViewHandle() const { |
| 93 | return *buffer_view; | 93 | return *buffer_view; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| @@ -104,9 +104,9 @@ private: | |||
| 104 | 104 | ||
| 105 | void UploadImage(const std::vector<u8>& staging_buffer); | 105 | void UploadImage(const std::vector<u8>& staging_buffer); |
| 106 | 106 | ||
| 107 | vk::BufferImageCopy GetBufferImageCopy(u32 level) const; | 107 | VkBufferImageCopy GetBufferImageCopy(u32 level) const; |
| 108 | 108 | ||
| 109 | vk::ImageSubresourceRange GetImageSubresourceRange() const; | 109 | VkImageSubresourceRange GetImageSubresourceRange() const; |
| 110 | 110 | ||
| 111 | Core::System& system; | 111 | Core::System& system; |
| 112 | const VKDevice& device; | 112 | const VKDevice& device; |
| @@ -116,11 +116,11 @@ private: | |||
| 116 | VKStagingBufferPool& staging_pool; | 116 | VKStagingBufferPool& staging_pool; |
| 117 | 117 | ||
| 118 | std::optional<VKImage> image; | 118 | std::optional<VKImage> image; |
| 119 | UniqueBuffer buffer; | 119 | vk::Buffer buffer; |
| 120 | UniqueBufferView buffer_view; | 120 | vk::BufferView buffer_view; |
| 121 | VKMemoryCommit commit; | 121 | VKMemoryCommit commit; |
| 122 | 122 | ||
| 123 | vk::Format format; | 123 | VkFormat format = VK_FORMAT_UNDEFINED; |
| 124 | }; | 124 | }; |
| 125 | 125 | ||
| 126 | class CachedSurfaceView final : public VideoCommon::ViewBase { | 126 | class CachedSurfaceView final : public VideoCommon::ViewBase { |
| @@ -129,16 +129,16 @@ public: | |||
| 129 | const ViewParams& params, bool is_proxy); | 129 | const ViewParams& params, bool is_proxy); |
| 130 | ~CachedSurfaceView(); | 130 | ~CachedSurfaceView(); |
| 131 | 131 | ||
| 132 | vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source, | 132 | VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, |
| 133 | Tegra::Texture::SwizzleSource y_source, | 133 | Tegra::Texture::SwizzleSource y_source, |
| 134 | Tegra::Texture::SwizzleSource z_source, | 134 | Tegra::Texture::SwizzleSource z_source, |
| 135 | Tegra::Texture::SwizzleSource w_source); | 135 | Tegra::Texture::SwizzleSource w_source); |
| 136 | 136 | ||
| 137 | bool IsSameSurface(const CachedSurfaceView& rhs) const { | 137 | bool IsSameSurface(const CachedSurfaceView& rhs) const { |
| 138 | return &surface == &rhs.surface; | 138 | return &surface == &rhs.surface; |
| 139 | } | 139 | } |
| 140 | 140 | ||
| 141 | vk::ImageView GetHandle() { | 141 | VkImageView GetHandle() { |
| 142 | return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, | 142 | return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, |
| 143 | Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); | 143 | Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); |
| 144 | } | 144 | } |
| @@ -159,24 +159,24 @@ public: | |||
| 159 | return buffer_view; | 159 | return buffer_view; |
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | vk::Image GetImage() const { | 162 | VkImage GetImage() const { |
| 163 | return image; | 163 | return image; |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | vk::BufferView GetBufferView() const { | 166 | VkBufferView GetBufferView() const { |
| 167 | return buffer_view; | 167 | return buffer_view; |
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | vk::ImageSubresourceRange GetImageSubresourceRange() const { | 170 | VkImageSubresourceRange GetImageSubresourceRange() const { |
| 171 | return {aspect_mask, base_level, num_levels, base_layer, num_layers}; | 171 | return {aspect_mask, base_level, num_levels, base_layer, num_layers}; |
| 172 | } | 172 | } |
| 173 | 173 | ||
| 174 | vk::ImageSubresourceLayers GetImageSubresourceLayers() const { | 174 | VkImageSubresourceLayers GetImageSubresourceLayers() const { |
| 175 | return {surface.GetAspectMask(), base_level, base_layer, num_layers}; | 175 | return {surface.GetAspectMask(), base_level, base_layer, num_layers}; |
| 176 | } | 176 | } |
| 177 | 177 | ||
| 178 | void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask, | 178 | void Transition(VkImageLayout new_layout, VkPipelineStageFlags new_stage_mask, |
| 179 | vk::AccessFlags new_access) const { | 179 | VkAccessFlags new_access) const { |
| 180 | surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, | 180 | surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask, |
| 181 | new_access, new_layout); | 181 | new_access, new_layout); |
| 182 | } | 182 | } |
| @@ -196,9 +196,9 @@ private: | |||
| 196 | 196 | ||
| 197 | // Store a copy of these values to avoid double dereference when reading them | 197 | // Store a copy of these values to avoid double dereference when reading them |
| 198 | const SurfaceParams params; | 198 | const SurfaceParams params; |
| 199 | const vk::Image image; | 199 | const VkImage image; |
| 200 | const vk::BufferView buffer_view; | 200 | const VkBufferView buffer_view; |
| 201 | const vk::ImageAspectFlags aspect_mask; | 201 | const VkImageAspectFlags aspect_mask; |
| 202 | 202 | ||
| 203 | const VKDevice& device; | 203 | const VKDevice& device; |
| 204 | CachedSurface& surface; | 204 | CachedSurface& surface; |
| @@ -206,12 +206,12 @@ private: | |||
| 206 | const u32 num_layers; | 206 | const u32 num_layers; |
| 207 | const u32 base_level; | 207 | const u32 base_level; |
| 208 | const u32 num_levels; | 208 | const u32 num_levels; |
| 209 | const vk::ImageViewType image_view_type; | 209 | const VkImageViewType image_view_type; |
| 210 | 210 | ||
| 211 | vk::ImageView last_image_view; | 211 | VkImageView last_image_view = nullptr; |
| 212 | u32 last_swizzle{}; | 212 | u32 last_swizzle = 0; |
| 213 | 213 | ||
| 214 | std::unordered_map<u32, UniqueImageView> view_cache; | 214 | std::unordered_map<u32, vk::ImageView> view_cache; |
| 215 | }; | 215 | }; |
| 216 | 216 | ||
| 217 | class VKTextureCache final : public TextureCacheBase { | 217 | class VKTextureCache final : public TextureCacheBase { |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 0e577b9ff..4bfec0077 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp | |||
| @@ -7,10 +7,10 @@ | |||
| 7 | 7 | ||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | 10 | #include "video_core/renderer_vulkan/vk_device.h" |
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 13 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 12 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 14 | 14 | ||
| 15 | namespace Vulkan { | 15 | namespace Vulkan { |
| 16 | 16 | ||
| @@ -27,8 +27,8 @@ void VKUpdateDescriptorQueue::Acquire() { | |||
| 27 | entries.clear(); | 27 | entries.clear(); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template, | 30 | void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template, |
| 31 | vk::DescriptorSet set) { | 31 | VkDescriptorSet set) { |
| 32 | if (payload.size() + entries.size() >= payload.max_size()) { | 32 | if (payload.size() + entries.size() >= payload.max_size()) { |
| 33 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); | 33 | LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread"); |
| 34 | scheduler.WaitWorker(); | 34 | scheduler.WaitWorker(); |
| @@ -37,21 +37,21 @@ void VKUpdateDescriptorQueue::Send(vk::DescriptorUpdateTemplate update_template, | |||
| 37 | 37 | ||
| 38 | const auto payload_start = payload.data() + payload.size(); | 38 | const auto payload_start = payload.data() + payload.size(); |
| 39 | for (const auto& entry : entries) { | 39 | for (const auto& entry : entries) { |
| 40 | if (const auto image = std::get_if<vk::DescriptorImageInfo>(&entry)) { | 40 | if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { |
| 41 | payload.push_back(*image); | 41 | payload.push_back(*image); |
| 42 | } else if (const auto buffer = std::get_if<Buffer>(&entry)) { | 42 | } else if (const auto buffer = std::get_if<Buffer>(&entry)) { |
| 43 | payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); | 43 | payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); |
| 44 | } else if (const auto texel = std::get_if<vk::BufferView>(&entry)) { | 44 | } else if (const auto texel = std::get_if<VkBufferView>(&entry)) { |
| 45 | payload.push_back(*texel); | 45 | payload.push_back(*texel); |
| 46 | } else { | 46 | } else { |
| 47 | UNREACHABLE(); | 47 | UNREACHABLE(); |
| 48 | } | 48 | } |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | scheduler.Record([dev = device.GetLogical(), payload_start, set, | 51 | scheduler.Record( |
| 52 | update_template]([[maybe_unused]] auto cmdbuf, auto& dld) { | 52 | [payload_start, set, update_template, logical = &device.GetLogical()](vk::CommandBuffer) { |
| 53 | dev.updateDescriptorSetWithTemplate(set, update_template, payload_start, dld); | 53 | logical->UpdateDescriptorSet(set, update_template, payload_start); |
| 54 | }); | 54 | }); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | } // namespace Vulkan | 57 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 8c825aa29..a9e3d5dba 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | #include <boost/container/static_vector.hpp> | 9 | #include <boost/container/static_vector.hpp> |
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "video_core/renderer_vulkan/declarations.h" | 12 | #include "video_core/renderer_vulkan/wrapper.h" |
| 13 | 13 | ||
| 14 | namespace Vulkan { | 14 | namespace Vulkan { |
| 15 | 15 | ||
| @@ -20,18 +20,18 @@ class DescriptorUpdateEntry { | |||
| 20 | public: | 20 | public: |
| 21 | explicit DescriptorUpdateEntry() : image{} {} | 21 | explicit DescriptorUpdateEntry() : image{} {} |
| 22 | 22 | ||
| 23 | DescriptorUpdateEntry(vk::DescriptorImageInfo image) : image{image} {} | 23 | DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} |
| 24 | 24 | ||
| 25 | DescriptorUpdateEntry(vk::Buffer buffer, vk::DeviceSize offset, vk::DeviceSize size) | 25 | DescriptorUpdateEntry(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) |
| 26 | : buffer{buffer, offset, size} {} | 26 | : buffer{buffer, offset, size} {} |
| 27 | 27 | ||
| 28 | DescriptorUpdateEntry(vk::BufferView texel_buffer) : texel_buffer{texel_buffer} {} | 28 | DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} |
| 29 | 29 | ||
| 30 | private: | 30 | private: |
| 31 | union { | 31 | union { |
| 32 | vk::DescriptorImageInfo image; | 32 | VkDescriptorImageInfo image; |
| 33 | vk::DescriptorBufferInfo buffer; | 33 | VkDescriptorBufferInfo buffer; |
| 34 | vk::BufferView texel_buffer; | 34 | VkBufferView texel_buffer; |
| 35 | }; | 35 | }; |
| 36 | }; | 36 | }; |
| 37 | 37 | ||
| @@ -44,37 +44,35 @@ public: | |||
| 44 | 44 | ||
| 45 | void Acquire(); | 45 | void Acquire(); |
| 46 | 46 | ||
| 47 | void Send(vk::DescriptorUpdateTemplate update_template, vk::DescriptorSet set); | 47 | void Send(VkDescriptorUpdateTemplateKHR update_template, VkDescriptorSet set); |
| 48 | 48 | ||
| 49 | void AddSampledImage(vk::Sampler sampler, vk::ImageView image_view) { | 49 | void AddSampledImage(VkSampler sampler, VkImageView image_view) { |
| 50 | entries.emplace_back(vk::DescriptorImageInfo{sampler, image_view, {}}); | 50 | entries.emplace_back(VkDescriptorImageInfo{sampler, image_view, {}}); |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | void AddImage(vk::ImageView image_view) { | 53 | void AddImage(VkImageView image_view) { |
| 54 | entries.emplace_back(vk::DescriptorImageInfo{{}, image_view, {}}); | 54 | entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); |
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | void AddBuffer(const vk::Buffer* buffer, u64 offset, std::size_t size) { | 57 | void AddBuffer(const VkBuffer* buffer, u64 offset, std::size_t size) { |
| 58 | entries.push_back(Buffer{buffer, offset, size}); | 58 | entries.push_back(Buffer{buffer, offset, size}); |
| 59 | } | 59 | } |
| 60 | 60 | ||
| 61 | void AddTexelBuffer(vk::BufferView texel_buffer) { | 61 | void AddTexelBuffer(VkBufferView texel_buffer) { |
| 62 | entries.emplace_back(texel_buffer); | 62 | entries.emplace_back(texel_buffer); |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | vk::ImageLayout* GetLastImageLayout() { | 65 | VkImageLayout* GetLastImageLayout() { |
| 66 | return &std::get<vk::DescriptorImageInfo>(entries.back()).imageLayout; | 66 | return &std::get<VkDescriptorImageInfo>(entries.back()).imageLayout; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | private: | 69 | private: |
| 70 | struct Buffer { | 70 | struct Buffer { |
| 71 | const vk::Buffer* buffer{}; | 71 | const VkBuffer* buffer = nullptr; |
| 72 | u64 offset{}; | 72 | u64 offset = 0; |
| 73 | std::size_t size{}; | 73 | std::size_t size = 0; |
| 74 | }; | 74 | }; |
| 75 | using Variant = std::variant<vk::DescriptorImageInfo, Buffer, vk::BufferView>; | 75 | using Variant = std::variant<VkDescriptorImageInfo, Buffer, VkBufferView>; |
| 76 | // Old gcc versions don't consider this trivially copyable. | ||
| 77 | // static_assert(std::is_trivially_copyable_v<Variant>); | ||
| 78 | 76 | ||
| 79 | const VKDevice& device; | 77 | const VKDevice& device; |
| 80 | VKScheduler& scheduler; | 78 | VKScheduler& scheduler; |
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 478394682..4db329fa5 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -136,7 +136,8 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | |||
| 136 | SetRegister(bb, instr.gpr0, value); | 136 | SetRegister(bb, instr.gpr0, value); |
| 137 | break; | 137 | break; |
| 138 | } | 138 | } |
| 139 | case OpCode::Id::FCMP_R: { | 139 | case OpCode::Id::FCMP_RR: |
| 140 | case OpCode::Id::FCMP_RC: { | ||
| 140 | UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); | 141 | UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); |
| 141 | Node op_c = GetRegister(instr.gpr39); | 142 | Node op_c = GetRegister(instr.gpr39); |
| 142 | Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); | 143 | Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); |
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index c72690b2b..b9989c88c 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -2,6 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <limits> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 5 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 6 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 7 | #include "video_core/engines/shader_bytecode.h" | 11 | #include "video_core/engines/shader_bytecode.h" |
| @@ -15,9 +19,49 @@ using Tegra::Shader::OpCode; | |||
| 15 | using Tegra::Shader::Register; | 19 | using Tegra::Shader::Register; |
| 16 | 20 | ||
| 17 | namespace { | 21 | namespace { |
| 22 | |||
| 18 | constexpr OperationCode GetFloatSelector(u64 selector) { | 23 | constexpr OperationCode GetFloatSelector(u64 selector) { |
| 19 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | 24 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; |
| 20 | } | 25 | } |
| 26 | |||
| 27 | constexpr u32 SizeInBits(Register::Size size) { | ||
| 28 | switch (size) { | ||
| 29 | case Register::Size::Byte: | ||
| 30 | return 8; | ||
| 31 | case Register::Size::Short: | ||
| 32 | return 16; | ||
| 33 | case Register::Size::Word: | ||
| 34 | return 32; | ||
| 35 | case Register::Size::Long: | ||
| 36 | return 64; | ||
| 37 | } | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, | ||
| 42 | Register::Size dst_size, | ||
| 43 | bool src_signed, | ||
| 44 | bool dst_signed) { | ||
| 45 | const u32 dst_bits = SizeInBits(dst_size); | ||
| 46 | if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { | ||
| 47 | if (src_signed == dst_signed) { | ||
| 48 | return std::nullopt; | ||
| 49 | } | ||
| 50 | return std::make_pair(0, std::numeric_limits<s32>::max()); | ||
| 51 | } | ||
| 52 | if (dst_signed) { | ||
| 53 | // Signed destination, clamp to [-128, 127] for instance | ||
| 54 | return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); | ||
| 55 | } else { | ||
| 56 | // Unsigned destination | ||
| 57 | if (dst_bits == 32) { | ||
| 58 | // Avoid shifting by 32, that is undefined behavior | ||
| 59 | return std::make_pair(0, s32(std::numeric_limits<u32>::max())); | ||
| 60 | } | ||
| 61 | return std::make_pair(0, (1 << dst_bits) - 1); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 21 | } // Anonymous namespace | 65 | } // Anonymous namespace |
| 22 | 66 | ||
| 23 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | 67 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { |
| @@ -28,14 +72,13 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 28 | case OpCode::Id::I2I_R: | 72 | case OpCode::Id::I2I_R: |
| 29 | case OpCode::Id::I2I_C: | 73 | case OpCode::Id::I2I_C: |
| 30 | case OpCode::Id::I2I_IMM: { | 74 | case OpCode::Id::I2I_IMM: { |
| 31 | UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0); | 75 | const bool src_signed = instr.conversion.is_input_signed; |
| 32 | UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | 76 | const bool dst_signed = instr.conversion.is_output_signed; |
| 33 | UNIMPLEMENTED_IF(instr.alu.saturate_d); | 77 | const Register::Size src_size = instr.conversion.src_size; |
| 78 | const Register::Size dst_size = instr.conversion.dst_size; | ||
| 79 | const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); | ||
| 34 | 80 | ||
| 35 | const bool input_signed = instr.conversion.is_input_signed; | 81 | Node value = [this, instr, opcode] { |
| 36 | const bool output_signed = instr.conversion.is_output_signed; | ||
| 37 | |||
| 38 | Node value = [&]() { | ||
| 39 | switch (opcode->get().GetId()) { | 82 | switch (opcode->get().GetId()) { |
| 40 | case OpCode::Id::I2I_R: | 83 | case OpCode::Id::I2I_R: |
| 41 | return GetRegister(instr.gpr20); | 84 | return GetRegister(instr.gpr20); |
| @@ -48,16 +91,60 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
| 48 | return Immediate(0); | 91 | return Immediate(0); |
| 49 | } | 92 | } |
| 50 | }(); | 93 | }(); |
| 51 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 52 | 94 | ||
| 53 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, | 95 | // Ensure the source selector is valid |
| 54 | input_signed); | 96 | switch (instr.conversion.src_size) { |
| 55 | if (input_signed != output_signed) { | 97 | case Register::Size::Byte: |
| 56 | value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); | 98 | break; |
| 99 | case Register::Size::Short: | ||
| 100 | ASSERT(selector == 0 || selector == 2); | ||
| 101 | break; | ||
| 102 | default: | ||
| 103 | ASSERT(selector == 0); | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | |||
| 107 | if (src_size != Register::Size::Word || selector != 0) { | ||
| 108 | value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), | ||
| 109 | Immediate(selector * 8), Immediate(SizeInBits(src_size))); | ||
| 110 | } | ||
| 111 | |||
| 112 | value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, | ||
| 113 | instr.conversion.negate_a, src_signed); | ||
| 114 | |||
| 115 | if (instr.alu.saturate_d) { | ||
| 116 | if (src_signed && !dst_signed) { | ||
| 117 | Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, | ||
| 118 | Immediate(1 << (SizeInBits(src_size) - 1))); | ||
| 119 | value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), | ||
| 120 | std::move(value)); | ||
| 121 | |||
| 122 | // Simplify generated expressions, this can be removed without semantic impact | ||
| 123 | SetTemporary(bb, 0, std::move(value)); | ||
| 124 | value = GetTemporary(0); | ||
| 125 | |||
| 126 | if (dst_size != Register::Size::Word) { | ||
| 127 | const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 128 | Node is_large = | ||
| 129 | Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); | ||
| 130 | value = Operation(OperationCode::Select, std::move(is_large), limit, | ||
| 131 | std::move(value)); | ||
| 132 | } | ||
| 133 | } else if (const std::optional bounds = | ||
| 134 | IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { | ||
| 135 | value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), | ||
| 136 | Immediate(bounds->first)); | ||
| 137 | value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), | ||
| 138 | Immediate(bounds->second)); | ||
| 139 | } | ||
| 140 | } else if (dst_size != Register::Size::Word) { | ||
| 141 | // No saturation, we only have to mask the result | ||
| 142 | Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 143 | value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); | ||
| 57 | } | 144 | } |
| 58 | 145 | ||
| 59 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | 146 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); |
| 60 | SetRegister(bb, instr.gpr0, value); | 147 | SetRegister(bb, instr.gpr0, std::move(value)); |
| 61 | break; | 148 | break; |
| 62 | } | 149 | } |
| 63 | case OpCode::Id::I2F_R: | 150 | case OpCode::Id::I2F_R: |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d2fe4ec5d..0dd7a1196 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -13,13 +13,247 @@ | |||
| 13 | #include "video_core/engines/shader_bytecode.h" | 13 | #include "video_core/engines/shader_bytecode.h" |
| 14 | #include "video_core/shader/node_helper.h" | 14 | #include "video_core/shader/node_helper.h" |
| 15 | #include "video_core/shader/shader_ir.h" | 15 | #include "video_core/shader/shader_ir.h" |
| 16 | #include "video_core/textures/texture.h" | ||
| 16 | 17 | ||
| 17 | namespace VideoCommon::Shader { | 18 | namespace VideoCommon::Shader { |
| 18 | 19 | ||
| 19 | using Tegra::Shader::Instruction; | 20 | using Tegra::Shader::Instruction; |
| 20 | using Tegra::Shader::OpCode; | 21 | using Tegra::Shader::OpCode; |
| 22 | using Tegra::Shader::PredCondition; | ||
| 23 | using Tegra::Shader::StoreType; | ||
| 24 | using Tegra::Texture::ComponentType; | ||
| 25 | using Tegra::Texture::TextureFormat; | ||
| 26 | using Tegra::Texture::TICEntry; | ||
| 21 | 27 | ||
| 22 | namespace { | 28 | namespace { |
| 29 | |||
| 30 | ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | ||
| 31 | std::size_t component) { | ||
| 32 | const TextureFormat format{descriptor.format}; | ||
| 33 | switch (format) { | ||
| 34 | case TextureFormat::R16_G16_B16_A16: | ||
| 35 | case TextureFormat::R32_G32_B32_A32: | ||
| 36 | case TextureFormat::R32_G32_B32: | ||
| 37 | case TextureFormat::R32_G32: | ||
| 38 | case TextureFormat::R16_G16: | ||
| 39 | case TextureFormat::R32: | ||
| 40 | case TextureFormat::R16: | ||
| 41 | case TextureFormat::R8: | ||
| 42 | case TextureFormat::R1: | ||
| 43 | if (component == 0) { | ||
| 44 | return descriptor.r_type; | ||
| 45 | } | ||
| 46 | if (component == 1) { | ||
| 47 | return descriptor.g_type; | ||
| 48 | } | ||
| 49 | if (component == 2) { | ||
| 50 | return descriptor.b_type; | ||
| 51 | } | ||
| 52 | if (component == 3) { | ||
| 53 | return descriptor.a_type; | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | case TextureFormat::A8R8G8B8: | ||
| 57 | if (component == 0) { | ||
| 58 | return descriptor.a_type; | ||
| 59 | } | ||
| 60 | if (component == 1) { | ||
| 61 | return descriptor.r_type; | ||
| 62 | } | ||
| 63 | if (component == 2) { | ||
| 64 | return descriptor.g_type; | ||
| 65 | } | ||
| 66 | if (component == 3) { | ||
| 67 | return descriptor.b_type; | ||
| 68 | } | ||
| 69 | break; | ||
| 70 | case TextureFormat::A2B10G10R10: | ||
| 71 | case TextureFormat::A4B4G4R4: | ||
| 72 | case TextureFormat::A5B5G5R1: | ||
| 73 | case TextureFormat::A1B5G5R5: | ||
| 74 | if (component == 0) { | ||
| 75 | return descriptor.a_type; | ||
| 76 | } | ||
| 77 | if (component == 1) { | ||
| 78 | return descriptor.b_type; | ||
| 79 | } | ||
| 80 | if (component == 2) { | ||
| 81 | return descriptor.g_type; | ||
| 82 | } | ||
| 83 | if (component == 3) { | ||
| 84 | return descriptor.r_type; | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | case TextureFormat::R32_B24G8: | ||
| 88 | if (component == 0) { | ||
| 89 | return descriptor.r_type; | ||
| 90 | } | ||
| 91 | if (component == 1) { | ||
| 92 | return descriptor.b_type; | ||
| 93 | } | ||
| 94 | if (component == 2) { | ||
| 95 | return descriptor.g_type; | ||
| 96 | } | ||
| 97 | break; | ||
| 98 | case TextureFormat::B5G6R5: | ||
| 99 | case TextureFormat::B6G5R5: | ||
| 100 | if (component == 0) { | ||
| 101 | return descriptor.b_type; | ||
| 102 | } | ||
| 103 | if (component == 1) { | ||
| 104 | return descriptor.g_type; | ||
| 105 | } | ||
| 106 | if (component == 2) { | ||
| 107 | return descriptor.r_type; | ||
| 108 | } | ||
| 109 | break; | ||
| 110 | case TextureFormat::G8R24: | ||
| 111 | case TextureFormat::G24R8: | ||
| 112 | case TextureFormat::G8R8: | ||
| 113 | case TextureFormat::G4R4: | ||
| 114 | if (component == 0) { | ||
| 115 | return descriptor.g_type; | ||
| 116 | } | ||
| 117 | if (component == 1) { | ||
| 118 | return descriptor.r_type; | ||
| 119 | } | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||
| 123 | return ComponentType::FLOAT; | ||
| 124 | } | ||
| 125 | |||
| 126 | bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { | ||
| 127 | constexpr u8 R = 0b0001; | ||
| 128 | constexpr u8 G = 0b0010; | ||
| 129 | constexpr u8 B = 0b0100; | ||
| 130 | constexpr u8 A = 0b1000; | ||
| 131 | constexpr std::array<u8, 16> mask = { | ||
| 132 | 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), | ||
| 133 | (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 134 | return std::bitset<4>{mask.at(component_mask)}.test(component); | ||
| 135 | } | ||
| 136 | |||
| 137 | u32 GetComponentSize(TextureFormat format, std::size_t component) { | ||
| 138 | switch (format) { | ||
| 139 | case TextureFormat::R32_G32_B32_A32: | ||
| 140 | return 32; | ||
| 141 | case TextureFormat::R16_G16_B16_A16: | ||
| 142 | return 16; | ||
| 143 | case TextureFormat::R32_G32_B32: | ||
| 144 | return component <= 2 ? 32 : 0; | ||
| 145 | case TextureFormat::R32_G32: | ||
| 146 | return component <= 1 ? 32 : 0; | ||
| 147 | case TextureFormat::R16_G16: | ||
| 148 | return component <= 1 ? 16 : 0; | ||
| 149 | case TextureFormat::R32: | ||
| 150 | return component == 0 ? 32 : 0; | ||
| 151 | case TextureFormat::R16: | ||
| 152 | return component == 0 ? 16 : 0; | ||
| 153 | case TextureFormat::R8: | ||
| 154 | return component == 0 ? 8 : 0; | ||
| 155 | case TextureFormat::R1: | ||
| 156 | return component == 0 ? 1 : 0; | ||
| 157 | case TextureFormat::A8R8G8B8: | ||
| 158 | return 8; | ||
| 159 | case TextureFormat::A2B10G10R10: | ||
| 160 | return (component == 3 || component == 2 || component == 1) ? 10 : 2; | ||
| 161 | case TextureFormat::A4B4G4R4: | ||
| 162 | return 4; | ||
| 163 | case TextureFormat::A5B5G5R1: | ||
| 164 | return (component == 0 || component == 1 || component == 2) ? 5 : 1; | ||
| 165 | case TextureFormat::A1B5G5R5: | ||
| 166 | return (component == 1 || component == 2 || component == 3) ? 5 : 1; | ||
| 167 | case TextureFormat::R32_B24G8: | ||
| 168 | if (component == 0) { | ||
| 169 | return 32; | ||
| 170 | } | ||
| 171 | if (component == 1) { | ||
| 172 | return 24; | ||
| 173 | } | ||
| 174 | if (component == 2) { | ||
| 175 | return 8; | ||
| 176 | } | ||
| 177 | return 0; | ||
| 178 | case TextureFormat::B5G6R5: | ||
| 179 | if (component == 0 || component == 2) { | ||
| 180 | return 5; | ||
| 181 | } | ||
| 182 | if (component == 1) { | ||
| 183 | return 6; | ||
| 184 | } | ||
| 185 | return 0; | ||
| 186 | case TextureFormat::B6G5R5: | ||
| 187 | if (component == 1 || component == 2) { | ||
| 188 | return 5; | ||
| 189 | } | ||
| 190 | if (component == 0) { | ||
| 191 | return 6; | ||
| 192 | } | ||
| 193 | return 0; | ||
| 194 | case TextureFormat::G8R24: | ||
| 195 | if (component == 0) { | ||
| 196 | return 8; | ||
| 197 | } | ||
| 198 | if (component == 1) { | ||
| 199 | return 24; | ||
| 200 | } | ||
| 201 | return 0; | ||
| 202 | case TextureFormat::G24R8: | ||
| 203 | if (component == 0) { | ||
| 204 | return 8; | ||
| 205 | } | ||
| 206 | if (component == 1) { | ||
| 207 | return 24; | ||
| 208 | } | ||
| 209 | return 0; | ||
| 210 | case TextureFormat::G8R8: | ||
| 211 | return (component == 0 || component == 1) ? 8 : 0; | ||
| 212 | case TextureFormat::G4R4: | ||
| 213 | return (component == 0 || component == 1) ? 4 : 0; | ||
| 214 | default: | ||
| 215 | UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||
| 216 | return 0; | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | std::size_t GetImageComponentMask(TextureFormat format) { | ||
| 221 | constexpr u8 R = 0b0001; | ||
| 222 | constexpr u8 G = 0b0010; | ||
| 223 | constexpr u8 B = 0b0100; | ||
| 224 | constexpr u8 A = 0b1000; | ||
| 225 | switch (format) { | ||
| 226 | case TextureFormat::R32_G32_B32_A32: | ||
| 227 | case TextureFormat::R16_G16_B16_A16: | ||
| 228 | case TextureFormat::A8R8G8B8: | ||
| 229 | case TextureFormat::A2B10G10R10: | ||
| 230 | case TextureFormat::A4B4G4R4: | ||
| 231 | case TextureFormat::A5B5G5R1: | ||
| 232 | case TextureFormat::A1B5G5R5: | ||
| 233 | return std::size_t{R | G | B | A}; | ||
| 234 | case TextureFormat::R32_G32_B32: | ||
| 235 | case TextureFormat::R32_B24G8: | ||
| 236 | case TextureFormat::B5G6R5: | ||
| 237 | case TextureFormat::B6G5R5: | ||
| 238 | return std::size_t{R | G | B}; | ||
| 239 | case TextureFormat::R32_G32: | ||
| 240 | case TextureFormat::R16_G16: | ||
| 241 | case TextureFormat::G8R24: | ||
| 242 | case TextureFormat::G24R8: | ||
| 243 | case TextureFormat::G8R8: | ||
| 244 | case TextureFormat::G4R4: | ||
| 245 | return std::size_t{R | G}; | ||
| 246 | case TextureFormat::R32: | ||
| 247 | case TextureFormat::R16: | ||
| 248 | case TextureFormat::R8: | ||
| 249 | case TextureFormat::R1: | ||
| 250 | return std::size_t{R}; | ||
| 251 | default: | ||
| 252 | UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||
| 253 | return std::size_t{R | G | B | A}; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 23 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | 257 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { |
| 24 | switch (image_type) { | 258 | switch (image_type) { |
| 25 | case Tegra::Shader::ImageType::Texture1D: | 259 | case Tegra::Shader::ImageType::Texture1D: |
| @@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | |||
| 37 | } | 271 | } |
| 38 | } // Anonymous namespace | 272 | } // Anonymous namespace |
| 39 | 273 | ||
| 274 | std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, | ||
| 275 | Node original_value) { | ||
| 276 | switch (component_type) { | ||
| 277 | case ComponentType::SNORM: { | ||
| 278 | // range [-1.0, 1.0] | ||
| 279 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 280 | Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); | ||
| 281 | cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); | ||
| 282 | return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; | ||
| 283 | } | ||
| 284 | case ComponentType::SINT: | ||
| 285 | case ComponentType::UNORM: { | ||
| 286 | bool is_signed = component_type == ComponentType::SINT; | ||
| 287 | // range [0.0, 1.0] | ||
| 288 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 289 | Immediate(static_cast<float>(1 << component_size) - 1.f)); | ||
| 290 | return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), | ||
| 291 | is_signed}; | ||
| 292 | } | ||
| 293 | case ComponentType::UINT: // range [0, (1 << component_size) - 1] | ||
| 294 | return {std::move(original_value), false}; | ||
| 295 | case ComponentType::FLOAT: | ||
| 296 | if (component_size == 16) { | ||
| 297 | return {Operation(OperationCode::HCastFloat, original_value), true}; | ||
| 298 | } else { | ||
| 299 | return {std::move(original_value), true}; | ||
| 300 | } | ||
| 301 | default: | ||
| 302 | UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); | ||
| 303 | return {std::move(original_value), true}; | ||
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 40 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | 307 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { |
| 41 | const Instruction instr = {program_code[pc]}; | 308 | const Instruction instr = {program_code[pc]}; |
| 42 | const auto opcode = OpCode::Decode(instr); | 309 | const auto opcode = OpCode::Decode(instr); |
| @@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 53 | 320 | ||
| 54 | switch (opcode->get().GetId()) { | 321 | switch (opcode->get().GetId()) { |
| 55 | case OpCode::Id::SULD: { | 322 | case OpCode::Id::SULD: { |
| 56 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 57 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | 323 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != |
| 58 | Tegra::Shader::OutOfBoundsStore::Ignore); | 324 | Tegra::Shader::OutOfBoundsStore::Ignore); |
| 59 | 325 | ||
| @@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 62 | : GetBindlessImage(instr.gpr39, type)}; | 328 | : GetBindlessImage(instr.gpr39, type)}; |
| 63 | image.MarkRead(); | 329 | image.MarkRead(); |
| 64 | 330 | ||
| 65 | u32 indexer = 0; | 331 | if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { |
| 66 | for (u32 element = 0; element < 4; ++element) { | 332 | u32 indexer = 0; |
| 67 | if (!instr.suldst.IsComponentEnabled(element)) { | 333 | for (u32 element = 0; element < 4; ++element) { |
| 68 | continue; | 334 | if (!instr.suldst.IsComponentEnabled(element)) { |
| 335 | continue; | ||
| 336 | } | ||
| 337 | MetaImage meta{image, {}, element}; | ||
| 338 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 339 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 340 | } | ||
| 341 | for (u32 i = 0; i < indexer; ++i) { | ||
| 342 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 343 | } | ||
| 344 | } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { | ||
| 345 | UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && | ||
| 346 | instr.suldst.GetStoreDataLayout() != StoreType::Bits64); | ||
| 347 | |||
| 348 | auto descriptor = [this, instr] { | ||
| 349 | std::optional<Tegra::Engines::SamplerDescriptor> descriptor; | ||
| 350 | if (instr.suldst.is_immediate) { | ||
| 351 | descriptor = | ||
| 352 | registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); | ||
| 353 | } else { | ||
| 354 | const Node image_register = GetRegister(instr.gpr39); | ||
| 355 | const auto [base_image, buffer, offset] = TrackCbuf( | ||
| 356 | image_register, global_code, static_cast<s64>(global_code.size())); | ||
| 357 | descriptor = registry.ObtainBindlessSampler(buffer, offset); | ||
| 358 | } | ||
| 359 | if (!descriptor) { | ||
| 360 | UNREACHABLE_MSG("Failed to obtain image descriptor"); | ||
| 361 | } | ||
| 362 | return *descriptor; | ||
| 363 | }(); | ||
| 364 | |||
| 365 | const auto comp_mask = GetImageComponentMask(descriptor.format); | ||
| 366 | |||
| 367 | switch (instr.suldst.GetStoreDataLayout()) { | ||
| 368 | case StoreType::Bits32: | ||
| 369 | case StoreType::Bits64: { | ||
| 370 | u32 indexer = 0; | ||
| 371 | u32 shifted_counter = 0; | ||
| 372 | Node value = Immediate(0); | ||
| 373 | for (u32 element = 0; element < 4; ++element) { | ||
| 374 | if (!IsComponentEnabled(comp_mask, element)) { | ||
| 375 | continue; | ||
| 376 | } | ||
| 377 | const auto component_type = GetComponentType(descriptor, element); | ||
| 378 | const auto component_size = GetComponentSize(descriptor.format, element); | ||
| 379 | MetaImage meta{image, {}, element}; | ||
| 380 | |||
| 381 | auto [converted_value, is_signed] = GetComponentValue( | ||
| 382 | component_type, component_size, | ||
| 383 | Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); | ||
| 384 | |||
| 385 | // shift element to correct position | ||
| 386 | const auto shifted = shifted_counter; | ||
| 387 | if (shifted > 0) { | ||
| 388 | converted_value = | ||
| 389 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, | ||
| 390 | std::move(converted_value), Immediate(shifted)); | ||
| 391 | } | ||
| 392 | shifted_counter += component_size; | ||
| 393 | |||
| 394 | // add value into result | ||
| 395 | value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); | ||
| 396 | |||
| 397 | // if we shifted enough for 1 byte -> we save it into temp | ||
| 398 | if (shifted_counter >= 32) { | ||
| 399 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 400 | // reset counter and value to prepare pack next byte | ||
| 401 | value = Immediate(0); | ||
| 402 | shifted_counter = 0; | ||
| 403 | } | ||
| 404 | } | ||
| 405 | for (u32 i = 0; i < indexer; ++i) { | ||
| 406 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 407 | } | ||
| 408 | break; | ||
| 409 | } | ||
| 410 | default: | ||
| 411 | UNREACHABLE(); | ||
| 412 | break; | ||
| 69 | } | 413 | } |
| 70 | MetaImage meta{image, {}, element}; | ||
| 71 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 72 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 73 | } | ||
| 74 | for (u32 i = 0; i < indexer; ++i) { | ||
| 75 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 76 | } | 414 | } |
| 77 | break; | 415 | break; |
| 78 | } | 416 | } |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 4944e9d69..d4f95b18c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -11,12 +11,17 @@ | |||
| 11 | 11 | ||
| 12 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 13 | 13 | ||
| 14 | using std::move; | ||
| 14 | using Tegra::Shader::ConditionCode; | 15 | using Tegra::Shader::ConditionCode; |
| 15 | using Tegra::Shader::Instruction; | 16 | using Tegra::Shader::Instruction; |
| 17 | using Tegra::Shader::IpaInterpMode; | ||
| 16 | using Tegra::Shader::OpCode; | 18 | using Tegra::Shader::OpCode; |
| 19 | using Tegra::Shader::PixelImap; | ||
| 17 | using Tegra::Shader::Register; | 20 | using Tegra::Shader::Register; |
| 18 | using Tegra::Shader::SystemVariable; | 21 | using Tegra::Shader::SystemVariable; |
| 19 | 22 | ||
| 23 | using Index = Tegra::Shader::Attribute::Index; | ||
| 24 | |||
| 20 | u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | 25 | u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { |
| 21 | const Instruction instr = {program_code[pc]}; | 26 | const Instruction instr = {program_code[pc]}; |
| 22 | const auto opcode = OpCode::Decode(instr); | 27 | const auto opcode = OpCode::Decode(instr); |
| @@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 66 | bb.push_back(Operation(OperationCode::Discard)); | 71 | bb.push_back(Operation(OperationCode::Discard)); |
| 67 | break; | 72 | break; |
| 68 | } | 73 | } |
| 69 | case OpCode::Id::MOV_SYS: { | 74 | case OpCode::Id::S2R: { |
| 70 | const Node value = [this, instr] { | 75 | const Node value = [this, instr] { |
| 71 | switch (instr.sys20) { | 76 | switch (instr.sys20) { |
| 72 | case SystemVariable::LaneId: | 77 | case SystemVariable::LaneId: |
| 73 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); | 78 | LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete"); |
| 74 | return Immediate(0U); | 79 | return Immediate(0U); |
| 75 | case SystemVariable::InvocationId: | 80 | case SystemVariable::InvocationId: |
| 76 | return Operation(OperationCode::InvocationId); | 81 | return Operation(OperationCode::InvocationId); |
| 77 | case SystemVariable::Ydirection: | 82 | case SystemVariable::Ydirection: |
| 78 | return Operation(OperationCode::YNegate); | 83 | return Operation(OperationCode::YNegate); |
| 79 | case SystemVariable::InvocationInfo: | 84 | case SystemVariable::InvocationInfo: |
| 80 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | 85 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); |
| 86 | return Immediate(0U); | ||
| 87 | case SystemVariable::WscaleFactorXY: | ||
| 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); | ||
| 89 | return Immediate(0U); | ||
| 90 | case SystemVariable::WscaleFactorZ: | ||
| 91 | UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); | ||
| 81 | return Immediate(0U); | 92 | return Immediate(0U); |
| 82 | case SystemVariable::Tid: { | 93 | case SystemVariable::Tid: { |
| 83 | Node value = Immediate(0); | 94 | Node value = Immediate(0); |
| @@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 213 | } | 224 | } |
| 214 | case OpCode::Id::IPA: { | 225 | case OpCode::Id::IPA: { |
| 215 | const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; | 226 | const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; |
| 216 | |||
| 217 | const auto attribute = instr.attribute.fmt28; | 227 | const auto attribute = instr.attribute.fmt28; |
| 218 | const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), | 228 | const Index index = attribute.index; |
| 219 | instr.ipa.sample_mode.Value()}; | ||
| 220 | 229 | ||
| 221 | Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) | 230 | Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) |
| 222 | : GetInputAttribute(attribute.index, attribute.element); | 231 | : GetInputAttribute(index, attribute.element); |
| 223 | const Tegra::Shader::Attribute::Index index = attribute.index.Value(); | 232 | |
| 224 | const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && | 233 | // Code taken from Ryujinx. |
| 225 | index <= Tegra::Shader::Attribute::Index::Attribute_31; | 234 | if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { |
| 226 | if (is_generic || is_physical) { | 235 | const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); |
| 227 | // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. | 236 | if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { |
| 228 | // In theory by setting them as perspective, OpenGL does the perspective correction. | 237 | Node position_w = GetInputAttribute(Index::Position, 3); |
| 229 | // A way must figured to reverse the last step of it. | 238 | value = Operation(OperationCode::FMul, move(value), move(position_w)); |
| 230 | if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { | ||
| 231 | value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); | ||
| 232 | } | 239 | } |
| 233 | } | 240 | } |
| 234 | value = GetSaturatedFloat(value, instr.ipa.saturate); | ||
| 235 | 241 | ||
| 236 | SetRegister(bb, instr.gpr0, value); | 242 | if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { |
| 243 | value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); | ||
| 244 | } | ||
| 245 | |||
| 246 | value = GetSaturatedFloat(move(value), instr.ipa.saturate); | ||
| 247 | |||
| 248 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 237 | break; | 249 | break; |
| 238 | } | 250 | } |
| 239 | case OpCode::Id::OUT_R: { | 251 | case OpCode::Id::OUT_R: { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 48350e042..6c4a1358b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -780,20 +780,6 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 780 | // When lod is used always is in gpr20 | 780 | // When lod is used always is in gpr20 |
| 781 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | 781 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 782 | 782 | ||
| 783 | // Fill empty entries from the guest sampler | ||
| 784 | const std::size_t entry_coord_count = GetCoordCount(sampler.GetType()); | ||
| 785 | if (type_coord_count != entry_coord_count) { | ||
| 786 | LOG_WARNING(HW_GPU, "Bound and built texture types mismatch"); | ||
| 787 | |||
| 788 | // When the size is higher we insert zeroes | ||
| 789 | for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) { | ||
| 790 | coords.push_back(GetRegister(Register::ZeroIndex)); | ||
| 791 | } | ||
| 792 | |||
| 793 | // Then we ensure the size matches the number of entries (dropping unused values) | ||
| 794 | coords.resize(entry_coord_count); | ||
| 795 | } | ||
| 796 | |||
| 797 | Node4 values; | 783 | Node4 values; |
| 798 | for (u32 element = 0; element < values.size(); ++element) { | 784 | for (u32 element = 0; element < values.size(); ++element) { |
| 799 | auto coords_copy = coords; | 785 | auto coords_copy = coords; |
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index b047cf870..64ba60ea2 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp | |||
| @@ -10,16 +10,24 @@ | |||
| 10 | 10 | ||
| 11 | namespace VideoCommon::Shader { | 11 | namespace VideoCommon::Shader { |
| 12 | 12 | ||
| 13 | using std::move; | ||
| 13 | using Tegra::Shader::Instruction; | 14 | using Tegra::Shader::Instruction; |
| 14 | using Tegra::Shader::OpCode; | 15 | using Tegra::Shader::OpCode; |
| 15 | using Tegra::Shader::Pred; | 16 | using Tegra::Shader::Pred; |
| 16 | using Tegra::Shader::VideoType; | 17 | using Tegra::Shader::VideoType; |
| 17 | using Tegra::Shader::VmadShr; | 18 | using Tegra::Shader::VmadShr; |
| 19 | using Tegra::Shader::VmnmxOperation; | ||
| 20 | using Tegra::Shader::VmnmxType; | ||
| 18 | 21 | ||
| 19 | u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { | 22 | u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { |
| 20 | const Instruction instr = {program_code[pc]}; | 23 | const Instruction instr = {program_code[pc]}; |
| 21 | const auto opcode = OpCode::Decode(instr); | 24 | const auto opcode = OpCode::Decode(instr); |
| 22 | 25 | ||
| 26 | if (opcode->get().GetId() == OpCode::Id::VMNMX) { | ||
| 27 | DecodeVMNMX(bb, instr); | ||
| 28 | return pc; | ||
| 29 | } | ||
| 30 | |||
| 23 | const Node op_a = | 31 | const Node op_a = |
| 24 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | 32 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, |
| 25 | instr.video.type_a, instr.video.byte_height_a); | 33 | instr.video.type_a, instr.video.byte_height_a); |
| @@ -109,4 +117,54 @@ Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, | |||
| 109 | } | 117 | } |
| 110 | } | 118 | } |
| 111 | 119 | ||
| 120 | void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { | ||
| 121 | UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); | ||
| 122 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); | ||
| 123 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); | ||
| 124 | UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); | ||
| 125 | UNIMPLEMENTED_IF(instr.vmnmx.sat); | ||
| 126 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 127 | |||
| 128 | Node op_a = GetRegister(instr.gpr8); | ||
| 129 | Node op_b = GetRegister(instr.gpr20); | ||
| 130 | Node op_c = GetRegister(instr.gpr39); | ||
| 131 | |||
| 132 | const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed | ||
| 133 | const bool is_oper2_signed = instr.vmnmx.is_dest_signed; | ||
| 134 | |||
| 135 | const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; | ||
| 136 | Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); | ||
| 137 | |||
| 138 | switch (instr.vmnmx.operation) { | ||
| 139 | case VmnmxOperation::Mrg_16H: | ||
| 140 | value = BitfieldInsert(move(op_c), move(value), 16, 16); | ||
| 141 | break; | ||
| 142 | case VmnmxOperation::Mrg_16L: | ||
| 143 | value = BitfieldInsert(move(op_c), move(value), 0, 16); | ||
| 144 | break; | ||
| 145 | case VmnmxOperation::Mrg_8B0: | ||
| 146 | value = BitfieldInsert(move(op_c), move(value), 0, 8); | ||
| 147 | break; | ||
| 148 | case VmnmxOperation::Mrg_8B2: | ||
| 149 | value = BitfieldInsert(move(op_c), move(value), 16, 8); | ||
| 150 | break; | ||
| 151 | case VmnmxOperation::Acc: | ||
| 152 | value = Operation(OperationCode::IAdd, move(value), move(op_c)); | ||
| 153 | break; | ||
| 154 | case VmnmxOperation::Min: | ||
| 155 | value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); | ||
| 156 | break; | ||
| 157 | case VmnmxOperation::Max: | ||
| 158 | value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); | ||
| 159 | break; | ||
| 160 | case VmnmxOperation::Nop: | ||
| 161 | break; | ||
| 162 | default: | ||
| 163 | UNREACHABLE(); | ||
| 164 | break; | ||
| 165 | } | ||
| 166 | |||
| 167 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 168 | } | ||
| 169 | |||
| 112 | } // namespace VideoCommon::Shader | 170 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index baf7188d2..8852c8a1b 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { | |||
| 359 | switch (cc) { | 359 | switch (cc) { |
| 360 | case Tegra::Shader::ConditionCode::NEU: | 360 | case Tegra::Shader::ConditionCode::NEU: |
| 361 | return GetInternalFlag(InternalFlag::Zero, true); | 361 | return GetInternalFlag(InternalFlag::Zero, true); |
| 362 | case Tegra::Shader::ConditionCode::FCSM_TR: | ||
| 363 | UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); | ||
| 364 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | ||
| 362 | default: | 365 | default: |
| 363 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); | 366 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); |
| 364 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | 367 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 80fc9b82c..c6e7bdf50 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -312,6 +312,10 @@ private: | |||
| 312 | /// Conditionally saturates a half float pair | 312 | /// Conditionally saturates a half float pair |
| 313 | Node GetSaturatedHalfFloat(Node value, bool saturate = true); | 313 | Node GetSaturatedHalfFloat(Node value, bool saturate = true); |
| 314 | 314 | ||
| 315 | /// Get image component value by type and size | ||
| 316 | std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, | ||
| 317 | u32 component_size, Node original_value); | ||
| 318 | |||
| 315 | /// Returns a predicate comparing two floats | 319 | /// Returns a predicate comparing two floats |
| 316 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | 320 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); |
| 317 | /// Returns a predicate comparing two integers | 321 | /// Returns a predicate comparing two integers |
| @@ -350,6 +354,9 @@ private: | |||
| 350 | /// Marks the usage of a input or output attribute. | 354 | /// Marks the usage of a input or output attribute. |
| 351 | void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); | 355 | void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); |
| 352 | 356 | ||
| 357 | /// Decodes VMNMX instruction and inserts its code into the passed basic block. | ||
| 358 | void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); | ||
| 359 | |||
| 353 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | 360 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |
| 354 | const Node4& components); | 361 | const Node4& components); |
| 355 | 362 | ||
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index ae8817465..e0acd44d3 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { | |||
| 504 | return GetFormatBpp(pixel_format) / CHAR_BIT; | 504 | return GetFormatBpp(pixel_format) / CHAR_BIT; |
| 505 | } | 505 | } |
| 506 | 506 | ||
| 507 | enum class SurfaceCompression { | ||
| 508 | None, // Not compressed | ||
| 509 | Compressed, // Texture is compressed | ||
| 510 | Converted, // Texture is converted before upload or after download | ||
| 511 | Rearranged, // Texture is swizzled before upload or after download | ||
| 512 | }; | ||
| 513 | |||
| 514 | constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{ | ||
| 515 | SurfaceCompression::None, // ABGR8U | ||
| 516 | SurfaceCompression::None, // ABGR8S | ||
| 517 | SurfaceCompression::None, // ABGR8UI | ||
| 518 | SurfaceCompression::None, // B5G6R5U | ||
| 519 | SurfaceCompression::None, // A2B10G10R10U | ||
| 520 | SurfaceCompression::None, // A1B5G5R5U | ||
| 521 | SurfaceCompression::None, // R8U | ||
| 522 | SurfaceCompression::None, // R8UI | ||
| 523 | SurfaceCompression::None, // RGBA16F | ||
| 524 | SurfaceCompression::None, // RGBA16U | ||
| 525 | SurfaceCompression::None, // RGBA16S | ||
| 526 | SurfaceCompression::None, // RGBA16UI | ||
| 527 | SurfaceCompression::None, // R11FG11FB10F | ||
| 528 | SurfaceCompression::None, // RGBA32UI | ||
| 529 | SurfaceCompression::Compressed, // DXT1 | ||
| 530 | SurfaceCompression::Compressed, // DXT23 | ||
| 531 | SurfaceCompression::Compressed, // DXT45 | ||
| 532 | SurfaceCompression::Compressed, // DXN1 | ||
| 533 | SurfaceCompression::Compressed, // DXN2UNORM | ||
| 534 | SurfaceCompression::Compressed, // DXN2SNORM | ||
| 535 | SurfaceCompression::Compressed, // BC7U | ||
| 536 | SurfaceCompression::Compressed, // BC6H_UF16 | ||
| 537 | SurfaceCompression::Compressed, // BC6H_SF16 | ||
| 538 | SurfaceCompression::Converted, // ASTC_2D_4X4 | ||
| 539 | SurfaceCompression::None, // BGRA8 | ||
| 540 | SurfaceCompression::None, // RGBA32F | ||
| 541 | SurfaceCompression::None, // RG32F | ||
| 542 | SurfaceCompression::None, // R32F | ||
| 543 | SurfaceCompression::None, // R16F | ||
| 544 | SurfaceCompression::None, // R16U | ||
| 545 | SurfaceCompression::None, // R16S | ||
| 546 | SurfaceCompression::None, // R16UI | ||
| 547 | SurfaceCompression::None, // R16I | ||
| 548 | SurfaceCompression::None, // RG16 | ||
| 549 | SurfaceCompression::None, // RG16F | ||
| 550 | SurfaceCompression::None, // RG16UI | ||
| 551 | SurfaceCompression::None, // RG16I | ||
| 552 | SurfaceCompression::None, // RG16S | ||
| 553 | SurfaceCompression::None, // RGB32F | ||
| 554 | SurfaceCompression::None, // RGBA8_SRGB | ||
| 555 | SurfaceCompression::None, // RG8U | ||
| 556 | SurfaceCompression::None, // RG8S | ||
| 557 | SurfaceCompression::None, // RG32UI | ||
| 558 | SurfaceCompression::None, // RGBX16F | ||
| 559 | SurfaceCompression::None, // R32UI | ||
| 560 | SurfaceCompression::None, // R32I | ||
| 561 | SurfaceCompression::Converted, // ASTC_2D_8X8 | ||
| 562 | SurfaceCompression::Converted, // ASTC_2D_8X5 | ||
| 563 | SurfaceCompression::Converted, // ASTC_2D_5X4 | ||
| 564 | SurfaceCompression::None, // BGRA8_SRGB | ||
| 565 | SurfaceCompression::Compressed, // DXT1_SRGB | ||
| 566 | SurfaceCompression::Compressed, // DXT23_SRGB | ||
| 567 | SurfaceCompression::Compressed, // DXT45_SRGB | ||
| 568 | SurfaceCompression::Compressed, // BC7U_SRGB | ||
| 569 | SurfaceCompression::None, // R4G4B4A4U | ||
| 570 | SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB | ||
| 571 | SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB | ||
| 572 | SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB | ||
| 573 | SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB | ||
| 574 | SurfaceCompression::Converted, // ASTC_2D_5X5 | ||
| 575 | SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB | ||
| 576 | SurfaceCompression::Converted, // ASTC_2D_10X8 | ||
| 577 | SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB | ||
| 578 | SurfaceCompression::Converted, // ASTC_2D_6X6 | ||
| 579 | SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB | ||
| 580 | SurfaceCompression::Converted, // ASTC_2D_10X10 | ||
| 581 | SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB | ||
| 582 | SurfaceCompression::Converted, // ASTC_2D_12X12 | ||
| 583 | SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB | ||
| 584 | SurfaceCompression::Converted, // ASTC_2D_8X6 | ||
| 585 | SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB | ||
| 586 | SurfaceCompression::Converted, // ASTC_2D_6X5 | ||
| 587 | SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB | ||
| 588 | SurfaceCompression::None, // E5B9G9R9F | ||
| 589 | SurfaceCompression::None, // Z32F | ||
| 590 | SurfaceCompression::None, // Z16 | ||
| 591 | SurfaceCompression::None, // Z24S8 | ||
| 592 | SurfaceCompression::Rearranged, // S8Z24 | ||
| 593 | SurfaceCompression::None, // Z32FS8 | ||
| 594 | }}; | ||
| 595 | |||
| 596 | constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { | ||
| 597 | if (format == PixelFormat::Invalid) { | ||
| 598 | return SurfaceCompression::None; | ||
| 599 | } | ||
| 600 | DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size()); | ||
| 601 | return compression_type_table[static_cast<std::size_t>(format)]; | ||
| 602 | } | ||
| 603 | |||
| 604 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); | 507 | SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); |
| 605 | 508 | ||
| 606 | bool SurfaceTargetIsLayered(SurfaceTarget target); | 509 | bool SurfaceTargetIsLayered(SurfaceTarget target); |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 002df414f..7af0e792c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, | |||
| 18 | 18 | ||
| 19 | using Tegra::Texture::ConvertFromGuestToHost; | 19 | using Tegra::Texture::ConvertFromGuestToHost; |
| 20 | using VideoCore::MortonSwizzleMode; | 20 | using VideoCore::MortonSwizzleMode; |
| 21 | using VideoCore::Surface::SurfaceCompression; | 21 | using VideoCore::Surface::IsPixelFormatASTC; |
| 22 | using VideoCore::Surface::PixelFormat; | ||
| 22 | 23 | ||
| 23 | StagingCache::StagingCache() = default; | 24 | StagingCache::StagingCache() = default; |
| 24 | 25 | ||
| 25 | StagingCache::~StagingCache() = default; | 26 | StagingCache::~StagingCache() = default; |
| 26 | 27 | ||
| 27 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) | 28 | SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, |
| 28 | : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, | 29 | bool is_astc_supported) |
| 29 | mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { | 30 | : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels), |
| 31 | mipmap_offsets(params.num_levels) { | ||
| 32 | is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported; | ||
| 33 | host_memory_size = params.GetHostSizeInBytes(is_converted); | ||
| 34 | |||
| 30 | std::size_t offset = 0; | 35 | std::size_t offset = 0; |
| 31 | for (u32 level = 0; level < params.num_levels; ++level) { | 36 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 32 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; | 37 | const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; |
| @@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf | |||
| 164 | 169 | ||
| 165 | std::size_t guest_offset{mipmap_offsets[level]}; | 170 | std::size_t guest_offset{mipmap_offsets[level]}; |
| 166 | if (params.is_layered) { | 171 | if (params.is_layered) { |
| 167 | std::size_t host_offset{0}; | 172 | std::size_t host_offset = 0; |
| 168 | const std::size_t guest_stride = layer_size; | 173 | const std::size_t guest_stride = layer_size; |
| 169 | const std::size_t host_stride = params.GetHostLayerSize(level); | 174 | const std::size_t host_stride = params.GetHostLayerSize(level); |
| 170 | for (u32 layer = 0; layer < params.depth; ++layer) { | 175 | for (u32 layer = 0; layer < params.depth; ++layer) { |
| @@ -185,28 +190,17 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | |||
| 185 | MICROPROFILE_SCOPE(GPU_Load_Texture); | 190 | MICROPROFILE_SCOPE(GPU_Load_Texture); |
| 186 | auto& staging_buffer = staging_cache.GetBuffer(0); | 191 | auto& staging_buffer = staging_cache.GetBuffer(0); |
| 187 | u8* host_ptr; | 192 | u8* host_ptr; |
| 188 | is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); | 193 | // Use an extra temporal buffer |
| 189 | 194 | auto& tmp_buffer = staging_cache.GetBuffer(1); | |
| 190 | // Handle continuouty | 195 | tmp_buffer.resize(guest_memory_size); |
| 191 | if (is_continuous) { | 196 | host_ptr = tmp_buffer.data(); |
| 192 | // Use physical memory directly | 197 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
| 193 | host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 194 | if (!host_ptr) { | ||
| 195 | return; | ||
| 196 | } | ||
| 197 | } else { | ||
| 198 | // Use an extra temporal buffer | ||
| 199 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 200 | tmp_buffer.resize(guest_memory_size); | ||
| 201 | host_ptr = tmp_buffer.data(); | ||
| 202 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 203 | } | ||
| 204 | 198 | ||
| 205 | if (params.is_tiled) { | 199 | if (params.is_tiled) { |
| 206 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | 200 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", |
| 207 | params.block_width, static_cast<u32>(params.target)); | 201 | params.block_width, static_cast<u32>(params.target)); |
| 208 | for (u32 level = 0; level < params.num_levels; ++level) { | 202 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 209 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | 203 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; |
| 210 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, | 204 | SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, |
| 211 | staging_buffer.data() + host_offset, level); | 205 | staging_buffer.data() + host_offset, level); |
| 212 | } | 206 | } |
| @@ -219,7 +213,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | |||
| 219 | const u32 height{(params.height + block_height - 1) / block_height}; | 213 | const u32 height{(params.height + block_height - 1) / block_height}; |
| 220 | const u32 copy_size{width * bpp}; | 214 | const u32 copy_size{width * bpp}; |
| 221 | if (params.pitch == copy_size) { | 215 | if (params.pitch == copy_size) { |
| 222 | std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); | 216 | std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); |
| 223 | } else { | 217 | } else { |
| 224 | const u8* start{host_ptr}; | 218 | const u8* start{host_ptr}; |
| 225 | u8* write_to{staging_buffer.data()}; | 219 | u8* write_to{staging_buffer.data()}; |
| @@ -231,19 +225,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | |||
| 231 | } | 225 | } |
| 232 | } | 226 | } |
| 233 | 227 | ||
| 234 | auto compression_type = params.GetCompressionType(); | 228 | if (!is_converted && params.pixel_format != PixelFormat::S8Z24) { |
| 235 | if (compression_type == SurfaceCompression::None || | ||
| 236 | compression_type == SurfaceCompression::Compressed) | ||
| 237 | return; | 229 | return; |
| 230 | } | ||
| 238 | 231 | ||
| 239 | for (u32 level_up = params.num_levels; level_up > 0; --level_up) { | 232 | for (u32 level = params.num_levels; level--;) { |
| 240 | const u32 level = level_up - 1; | 233 | const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; |
| 241 | const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; | 234 | const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; |
| 242 | const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged | 235 | u8* const in_buffer = staging_buffer.data() + in_host_offset; |
| 243 | ? in_host_offset | 236 | u8* const out_buffer = staging_buffer.data() + out_host_offset; |
| 244 | : params.GetConvertedMipmapOffset(level); | ||
| 245 | u8* in_buffer = staging_buffer.data() + in_host_offset; | ||
| 246 | u8* out_buffer = staging_buffer.data() + out_host_offset; | ||
| 247 | ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, | 237 | ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, |
| 248 | params.GetMipWidth(level), params.GetMipHeight(level), | 238 | params.GetMipWidth(level), params.GetMipHeight(level), |
| 249 | params.GetMipDepth(level), true, true); | 239 | params.GetMipDepth(level), true, true); |
| @@ -256,24 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
| 256 | auto& staging_buffer = staging_cache.GetBuffer(0); | 246 | auto& staging_buffer = staging_cache.GetBuffer(0); |
| 257 | u8* host_ptr; | 247 | u8* host_ptr; |
| 258 | 248 | ||
| 259 | // Handle continuouty | 249 | // Use an extra temporal buffer |
| 260 | if (is_continuous) { | 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); |
| 261 | // Use physical memory directly | 251 | tmp_buffer.resize(guest_memory_size); |
| 262 | host_ptr = memory_manager.GetPointer(gpu_addr); | 252 | host_ptr = tmp_buffer.data(); |
| 263 | if (!host_ptr) { | ||
| 264 | return; | ||
| 265 | } | ||
| 266 | } else { | ||
| 267 | // Use an extra temporal buffer | ||
| 268 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 269 | tmp_buffer.resize(guest_memory_size); | ||
| 270 | host_ptr = tmp_buffer.data(); | ||
| 271 | } | ||
| 272 | 253 | ||
| 273 | if (params.is_tiled) { | 254 | if (params.is_tiled) { |
| 274 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | 255 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); |
| 275 | for (u32 level = 0; level < params.num_levels; ++level) { | 256 | for (u32 level = 0; level < params.num_levels; ++level) { |
| 276 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; | 257 | const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; |
| 277 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, | 258 | SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, |
| 278 | staging_buffer.data() + host_offset, level); | 259 | staging_buffer.data() + host_offset, level); |
| 279 | } | 260 | } |
| @@ -299,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
| 299 | } | 280 | } |
| 300 | } | 281 | } |
| 301 | } | 282 | } |
| 302 | if (!is_continuous) { | 283 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
| 303 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 304 | } | ||
| 305 | } | 284 | } |
| 306 | 285 | ||
| 307 | } // namespace VideoCommon | 286 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5f79bb0aa..a39a8661b 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -68,8 +68,8 @@ public: | |||
| 68 | return gpu_addr; | 68 | return gpu_addr; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | 71 | bool Overlaps(const VAddr start, const VAddr end) const { |
| 72 | return (cache_addr < end) && (cache_addr_end > start); | 72 | return (cpu_addr < end) && (cpu_addr_end > start); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { | 75 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { |
| @@ -86,21 +86,13 @@ public: | |||
| 86 | return cpu_addr; | 86 | return cpu_addr; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | void SetCpuAddr(const VAddr new_addr) { | 89 | VAddr GetCpuAddrEnd() const { |
| 90 | cpu_addr = new_addr; | 90 | return cpu_addr_end; |
| 91 | } | ||
| 92 | |||
| 93 | CacheAddr GetCacheAddr() const { | ||
| 94 | return cache_addr; | ||
| 95 | } | 91 | } |
| 96 | 92 | ||
| 97 | CacheAddr GetCacheAddrEnd() const { | 93 | void SetCpuAddr(const VAddr new_addr) { |
| 98 | return cache_addr_end; | 94 | cpu_addr = new_addr; |
| 99 | } | 95 | cpu_addr_end = new_addr + guest_memory_size; |
| 100 | |||
| 101 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 102 | cache_addr = new_addr; | ||
| 103 | cache_addr_end = new_addr + guest_memory_size; | ||
| 104 | } | 96 | } |
| 105 | 97 | ||
| 106 | const SurfaceParams& GetSurfaceParams() const { | 98 | const SurfaceParams& GetSurfaceParams() const { |
| @@ -119,18 +111,14 @@ public: | |||
| 119 | return mipmap_sizes[level]; | 111 | return mipmap_sizes[level]; |
| 120 | } | 112 | } |
| 121 | 113 | ||
| 122 | void MarkAsContinuous(const bool is_continuous) { | ||
| 123 | this->is_continuous = is_continuous; | ||
| 124 | } | ||
| 125 | |||
| 126 | bool IsContinuous() const { | ||
| 127 | return is_continuous; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool IsLinear() const { | 114 | bool IsLinear() const { |
| 131 | return !params.is_tiled; | 115 | return !params.is_tiled; |
| 132 | } | 116 | } |
| 133 | 117 | ||
| 118 | bool IsConverted() const { | ||
| 119 | return is_converted; | ||
| 120 | } | ||
| 121 | |||
| 134 | bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { | 122 | bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { |
| 135 | return params.pixel_format == pixel_format; | 123 | return params.pixel_format == pixel_format; |
| 136 | } | 124 | } |
| @@ -160,7 +148,8 @@ public: | |||
| 160 | } | 148 | } |
| 161 | 149 | ||
| 162 | protected: | 150 | protected: |
| 163 | explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); | 151 | explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, |
| 152 | bool is_astc_supported); | ||
| 164 | ~SurfaceBaseImpl() = default; | 153 | ~SurfaceBaseImpl() = default; |
| 165 | 154 | ||
| 166 | virtual void DecorateSurfaceName() = 0; | 155 | virtual void DecorateSurfaceName() = 0; |
| @@ -168,12 +157,11 @@ protected: | |||
| 168 | const SurfaceParams params; | 157 | const SurfaceParams params; |
| 169 | std::size_t layer_size; | 158 | std::size_t layer_size; |
| 170 | std::size_t guest_memory_size; | 159 | std::size_t guest_memory_size; |
| 171 | const std::size_t host_memory_size; | 160 | std::size_t host_memory_size; |
| 172 | GPUVAddr gpu_addr{}; | 161 | GPUVAddr gpu_addr{}; |
| 173 | CacheAddr cache_addr{}; | ||
| 174 | CacheAddr cache_addr_end{}; | ||
| 175 | VAddr cpu_addr{}; | 162 | VAddr cpu_addr{}; |
| 176 | bool is_continuous{}; | 163 | VAddr cpu_addr_end{}; |
| 164 | bool is_converted{}; | ||
| 177 | 165 | ||
| 178 | std::vector<std::size_t> mipmap_sizes; | 166 | std::vector<std::size_t> mipmap_sizes; |
| 179 | std::vector<std::size_t> mipmap_offsets; | 167 | std::vector<std::size_t> mipmap_offsets; |
| @@ -288,8 +276,9 @@ public: | |||
| 288 | } | 276 | } |
| 289 | 277 | ||
| 290 | protected: | 278 | protected: |
| 291 | explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) | 279 | explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params, |
| 292 | : SurfaceBaseImpl(gpu_addr, params) {} | 280 | bool is_astc_supported) |
| 281 | : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {} | ||
| 293 | 282 | ||
| 294 | ~SurfaceBase() = default; | 283 | ~SurfaceBase() = default; |
| 295 | 284 | ||
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9931c5ef7..6f3ef45be 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta | |||
| 113 | params.height = tic.Height(); | 113 | params.height = tic.Height(); |
| 114 | params.depth = tic.Depth(); | 114 | params.depth = tic.Depth(); |
| 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); | 115 | params.pitch = params.is_tiled ? 0 : tic.Pitch(); |
| 116 | if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { | 116 | if (params.target == SurfaceTarget::TextureCubemap || |
| 117 | params.depth = 1; | 117 | params.target == SurfaceTarget::TextureCubeArray) { |
| 118 | } else if (params.target == SurfaceTarget::TextureCubemap || | ||
| 119 | params.target == SurfaceTarget::TextureCubeArray) { | ||
| 120 | params.depth *= 6; | 118 | params.depth *= 6; |
| 121 | } | 119 | } |
| 122 | params.num_levels = tic.max_mip_level + 1; | 120 | params.num_levels = tic.max_mip_level + 1; |
| @@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { | |||
| 309 | return offset; | 307 | return offset; |
| 310 | } | 308 | } |
| 311 | 309 | ||
| 312 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { | 310 | std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { |
| 313 | std::size_t offset = 0; | ||
| 314 | for (u32 i = 0; i < level; i++) { | ||
| 315 | offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); | ||
| 316 | } | ||
| 317 | return offset; | ||
| 318 | } | ||
| 319 | |||
| 320 | std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { | ||
| 321 | std::size_t offset = 0; | 311 | std::size_t offset = 0; |
| 322 | for (u32 i = 0; i < level; i++) { | 312 | if (is_converted) { |
| 323 | offset += GetConvertedMipmapSize(i); | 313 | for (u32 i = 0; i < level; ++i) { |
| 314 | offset += GetConvertedMipmapSize(i) * GetNumLayers(); | ||
| 315 | } | ||
| 316 | } else { | ||
| 317 | for (u32 i = 0; i < level; ++i) { | ||
| 318 | offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); | ||
| 319 | } | ||
| 324 | } | 320 | } |
| 325 | return offset; | 321 | return offset; |
| 326 | } | 322 | } |
| 327 | 323 | ||
| 328 | std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { | 324 | std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { |
| 329 | constexpr std::size_t rgba8_bpp = 4ULL; | 325 | constexpr std::size_t rgba8_bpp = 4ULL; |
| 330 | const std::size_t width_t = GetMipWidth(level); | 326 | const std::size_t mip_width = GetMipWidth(level); |
| 331 | const std::size_t height_t = GetMipHeight(level); | 327 | const std::size_t mip_height = GetMipHeight(level); |
| 332 | const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); | 328 | const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); |
| 333 | return width_t * height_t * depth_t * rgba8_bpp; | 329 | return mip_width * mip_height * mip_depth * rgba8_bpp; |
| 334 | } | 330 | } |
| 335 | 331 | ||
| 336 | std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { | 332 | std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { |
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 995cc3818..24957df8d 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h | |||
| @@ -20,8 +20,6 @@ namespace VideoCommon { | |||
| 20 | 20 | ||
| 21 | class FormatLookupTable; | 21 | class FormatLookupTable; |
| 22 | 22 | ||
| 23 | using VideoCore::Surface::SurfaceCompression; | ||
| 24 | |||
| 25 | class SurfaceParams { | 23 | class SurfaceParams { |
| 26 | public: | 24 | public: |
| 27 | /// Creates SurfaceCachedParams from a texture configuration. | 25 | /// Creates SurfaceCachedParams from a texture configuration. |
| @@ -67,16 +65,14 @@ public: | |||
| 67 | return GetInnerMemorySize(false, false, false); | 65 | return GetInnerMemorySize(false, false, false); |
| 68 | } | 66 | } |
| 69 | 67 | ||
| 70 | std::size_t GetHostSizeInBytes() const { | 68 | std::size_t GetHostSizeInBytes(bool is_converted) const { |
| 71 | std::size_t host_size_in_bytes; | 69 | if (!is_converted) { |
| 72 | if (GetCompressionType() == SurfaceCompression::Converted) { | 70 | return GetInnerMemorySize(true, false, false); |
| 73 | // ASTC is uncompressed in software, in emulated as RGBA8 | 71 | } |
| 74 | host_size_in_bytes = 0; | 72 | // ASTC is uncompressed in software, in emulated as RGBA8 |
| 75 | for (u32 level = 0; level < num_levels; ++level) { | 73 | std::size_t host_size_in_bytes = 0; |
| 76 | host_size_in_bytes += GetConvertedMipmapSize(level); | 74 | for (u32 level = 0; level < num_levels; ++level) { |
| 77 | } | 75 | host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); |
| 78 | } else { | ||
| 79 | host_size_in_bytes = GetInnerMemorySize(true, false, false); | ||
| 80 | } | 76 | } |
| 81 | return host_size_in_bytes; | 77 | return host_size_in_bytes; |
| 82 | } | 78 | } |
| @@ -107,9 +103,8 @@ public: | |||
| 107 | u32 GetMipBlockDepth(u32 level) const; | 103 | u32 GetMipBlockDepth(u32 level) const; |
| 108 | 104 | ||
| 109 | /// Returns the best possible row/pitch alignment for the surface. | 105 | /// Returns the best possible row/pitch alignment for the surface. |
| 110 | u32 GetRowAlignment(u32 level) const { | 106 | u32 GetRowAlignment(u32 level, bool is_converted) const { |
| 111 | const u32 bpp = | 107 | const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); |
| 112 | GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); | ||
| 113 | return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); | 108 | return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); |
| 114 | } | 109 | } |
| 115 | 110 | ||
| @@ -117,11 +112,7 @@ public: | |||
| 117 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; | 112 | std::size_t GetGuestMipmapLevelOffset(u32 level) const; |
| 118 | 113 | ||
| 119 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. | 114 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level. |
| 120 | std::size_t GetHostMipmapLevelOffset(u32 level) const; | 115 | std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; |
| 121 | |||
| 122 | /// Returns the offset in bytes in host memory (linear) of a given mipmap level | ||
| 123 | /// for a texture that is converted in host gpu. | ||
| 124 | std::size_t GetConvertedMipmapOffset(u32 level) const; | ||
| 125 | 116 | ||
| 126 | /// Returns the size in bytes in guest memory of a given mipmap level. | 117 | /// Returns the size in bytes in guest memory of a given mipmap level. |
| 127 | std::size_t GetGuestMipmapSize(u32 level) const { | 118 | std::size_t GetGuestMipmapSize(u32 level) const { |
| @@ -196,11 +187,6 @@ public: | |||
| 196 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; | 187 | pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; |
| 197 | } | 188 | } |
| 198 | 189 | ||
| 199 | /// Returns how the compression should be handled for this texture. | ||
| 200 | SurfaceCompression GetCompressionType() const { | ||
| 201 | return VideoCore::Surface::GetFormatCompressionType(pixel_format); | ||
| 202 | } | ||
| 203 | |||
| 204 | /// Returns is the surface is a TextureBuffer type of surface. | 190 | /// Returns is the surface is a TextureBuffer type of surface. |
| 205 | bool IsBuffer() const { | 191 | bool IsBuffer() const { |
| 206 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; | 192 | return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6cdbe63d0..4edd4313b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 52 | 52 | ||
| 53 | template <typename TSurface, typename TView> | 53 | template <typename TSurface, typename TView> |
| 54 | class TextureCache { | 54 | class TextureCache { |
| 55 | using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; | ||
| 56 | using IntervalType = typename IntervalMap::interval_type; | ||
| 57 | 55 | ||
| 58 | public: | 56 | public: |
| 59 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | 57 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 60 | std::lock_guard lock{mutex}; | 58 | std::lock_guard lock{mutex}; |
| 61 | 59 | ||
| 62 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 60 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { |
| @@ -76,7 +74,7 @@ public: | |||
| 76 | guard_samplers = new_guard; | 74 | guard_samplers = new_guard; |
| 77 | } | 75 | } |
| 78 | 76 | ||
| 79 | void FlushRegion(CacheAddr addr, std::size_t size) { | 77 | void FlushRegion(VAddr addr, std::size_t size) { |
| 80 | std::lock_guard lock{mutex}; | 78 | std::lock_guard lock{mutex}; |
| 81 | 79 | ||
| 82 | auto surfaces = GetSurfacesInRegion(addr, size); | 80 | auto surfaces = GetSurfacesInRegion(addr, size); |
| @@ -99,9 +97,9 @@ public: | |||
| 99 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 97 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 100 | } | 98 | } |
| 101 | 99 | ||
| 102 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 100 | const std::optional<VAddr> cpu_addr = |
| 103 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 101 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 104 | if (!cache_addr) { | 102 | if (!cpu_addr) { |
| 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 103 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 106 | } | 104 | } |
| 107 | 105 | ||
| @@ -110,7 +108,7 @@ public: | |||
| 110 | } | 108 | } |
| 111 | 109 | ||
| 112 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 110 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; |
| 113 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 111 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); |
| 114 | if (guard_samplers) { | 112 | if (guard_samplers) { |
| 115 | sampled_textures.push_back(surface); | 113 | sampled_textures.push_back(surface); |
| 116 | } | 114 | } |
| @@ -124,13 +122,13 @@ public: | |||
| 124 | if (!gpu_addr) { | 122 | if (!gpu_addr) { |
| 125 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 123 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 126 | } | 124 | } |
| 127 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 125 | const std::optional<VAddr> cpu_addr = |
| 128 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 126 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 129 | if (!cache_addr) { | 127 | if (!cpu_addr) { |
| 130 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 128 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 131 | } | 129 | } |
| 132 | const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; | 130 | const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; |
| 133 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 131 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, false); |
| 134 | if (guard_samplers) { | 132 | if (guard_samplers) { |
| 135 | sampled_textures.push_back(surface); | 133 | sampled_textures.push_back(surface); |
| 136 | } | 134 | } |
| @@ -145,7 +143,7 @@ public: | |||
| 145 | return any_rt; | 143 | return any_rt; |
| 146 | } | 144 | } |
| 147 | 145 | ||
| 148 | TView GetDepthBufferSurface(bool preserve_contents) { | 146 | TView GetDepthBufferSurface() { |
| 149 | std::lock_guard lock{mutex}; | 147 | std::lock_guard lock{mutex}; |
| 150 | auto& maxwell3d = system.GPU().Maxwell3D(); | 148 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| 151 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { | 149 | if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { |
| @@ -159,14 +157,14 @@ public: | |||
| 159 | SetEmptyDepthBuffer(); | 157 | SetEmptyDepthBuffer(); |
| 160 | return {}; | 158 | return {}; |
| 161 | } | 159 | } |
| 162 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 160 | const std::optional<VAddr> cpu_addr = |
| 163 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 161 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 164 | if (!cache_addr) { | 162 | if (!cpu_addr) { |
| 165 | SetEmptyDepthBuffer(); | 163 | SetEmptyDepthBuffer(); |
| 166 | return {}; | 164 | return {}; |
| 167 | } | 165 | } |
| 168 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; | 166 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; |
| 169 | auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); | 167 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, true); |
| 170 | if (depth_buffer.target) | 168 | if (depth_buffer.target) |
| 171 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | 169 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 172 | depth_buffer.target = surface_view.first; | 170 | depth_buffer.target = surface_view.first; |
| @@ -176,7 +174,7 @@ public: | |||
| 176 | return surface_view.second; | 174 | return surface_view.second; |
| 177 | } | 175 | } |
| 178 | 176 | ||
| 179 | TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { | 177 | TView GetColorBufferSurface(std::size_t index) { |
| 180 | std::lock_guard lock{mutex}; | 178 | std::lock_guard lock{mutex}; |
| 181 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); | 179 | ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
| 182 | auto& maxwell3d = system.GPU().Maxwell3D(); | 180 | auto& maxwell3d = system.GPU().Maxwell3D(); |
| @@ -199,16 +197,15 @@ public: | |||
| 199 | return {}; | 197 | return {}; |
| 200 | } | 198 | } |
| 201 | 199 | ||
| 202 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 200 | const std::optional<VAddr> cpu_addr = |
| 203 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 201 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 204 | if (!cache_addr) { | 202 | if (!cpu_addr) { |
| 205 | SetEmptyColorBuffer(index); | 203 | SetEmptyColorBuffer(index); |
| 206 | return {}; | 204 | return {}; |
| 207 | } | 205 | } |
| 208 | 206 | ||
| 209 | auto surface_view = | 207 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, |
| 210 | GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), | 208 | SurfaceParams::CreateForFramebuffer(system, index), true); |
| 211 | preserve_contents, true); | ||
| 212 | if (render_targets[index].target) | 209 | if (render_targets[index].target) |
| 213 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | 210 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| 214 | render_targets[index].target = surface_view.first; | 211 | render_targets[index].target = surface_view.first; |
| @@ -257,27 +254,26 @@ public: | |||
| 257 | const GPUVAddr src_gpu_addr = src_config.Address(); | 254 | const GPUVAddr src_gpu_addr = src_config.Address(); |
| 258 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | 255 | const GPUVAddr dst_gpu_addr = dst_config.Address(); |
| 259 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | 256 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); |
| 260 | const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; | 257 | const std::optional<VAddr> dst_cpu_addr = |
| 261 | const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; | 258 | system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); |
| 262 | const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; | 259 | const std::optional<VAddr> src_cpu_addr = |
| 263 | const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; | 260 | system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); |
| 264 | std::pair<TSurface, TView> dst_surface = | 261 | std::pair<TSurface, TView> dst_surface = |
| 265 | GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); | 262 | GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, false); |
| 266 | std::pair<TSurface, TView> src_surface = | 263 | std::pair<TSurface, TView> src_surface = |
| 267 | GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); | 264 | GetSurface(src_gpu_addr, *src_cpu_addr, src_params, false); |
| 268 | ImageBlit(src_surface.second, dst_surface.second, copy_config); | 265 | ImageBlit(src_surface.second, dst_surface.second, copy_config); |
| 269 | dst_surface.first->MarkAsModified(true, Tick()); | 266 | dst_surface.first->MarkAsModified(true, Tick()); |
| 270 | } | 267 | } |
| 271 | 268 | ||
| 272 | TSurface TryFindFramebufferSurface(const u8* host_ptr) { | 269 | TSurface TryFindFramebufferSurface(VAddr addr) { |
| 273 | const CacheAddr cache_addr = ToCacheAddr(host_ptr); | 270 | if (!addr) { |
| 274 | if (!cache_addr) { | ||
| 275 | return nullptr; | 271 | return nullptr; |
| 276 | } | 272 | } |
| 277 | const CacheAddr page = cache_addr >> registry_page_bits; | 273 | const VAddr page = addr >> registry_page_bits; |
| 278 | std::vector<TSurface>& list = registry[page]; | 274 | std::vector<TSurface>& list = registry[page]; |
| 279 | for (auto& surface : list) { | 275 | for (auto& surface : list) { |
| 280 | if (surface->GetCacheAddr() == cache_addr) { | 276 | if (surface->GetCpuAddr() == addr) { |
| 281 | return surface; | 277 | return surface; |
| 282 | } | 278 | } |
| 283 | } | 279 | } |
| @@ -289,8 +285,9 @@ public: | |||
| 289 | } | 285 | } |
| 290 | 286 | ||
| 291 | protected: | 287 | protected: |
| 292 | TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | 288 | explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 293 | : system{system}, rasterizer{rasterizer} { | 289 | bool is_astc_supported) |
| 290 | : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} { | ||
| 294 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { | 291 | for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
| 295 | SetEmptyColorBuffer(i); | 292 | SetEmptyColorBuffer(i); |
| 296 | } | 293 | } |
| @@ -337,18 +334,14 @@ protected: | |||
| 337 | 334 | ||
| 338 | void Register(TSurface surface) { | 335 | void Register(TSurface surface) { |
| 339 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 336 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 340 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | ||
| 341 | const std::size_t size = surface->GetSizeInBytes(); | 337 | const std::size_t size = surface->GetSizeInBytes(); |
| 342 | const std::optional<VAddr> cpu_addr = | 338 | const std::optional<VAddr> cpu_addr = |
| 343 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | 339 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 344 | if (!cache_ptr || !cpu_addr) { | 340 | if (!cpu_addr) { |
| 345 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | 341 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", |
| 346 | gpu_addr); | 342 | gpu_addr); |
| 347 | return; | 343 | return; |
| 348 | } | 344 | } |
| 349 | const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); | ||
| 350 | surface->MarkAsContinuous(continuous); | ||
| 351 | surface->SetCacheAddr(cache_ptr); | ||
| 352 | surface->SetCpuAddr(*cpu_addr); | 345 | surface->SetCpuAddr(*cpu_addr); |
| 353 | RegisterInnerCache(surface); | 346 | RegisterInnerCache(surface); |
| 354 | surface->MarkAsRegistered(true); | 347 | surface->MarkAsRegistered(true); |
| @@ -381,6 +374,7 @@ protected: | |||
| 381 | } | 374 | } |
| 382 | 375 | ||
| 383 | Core::System& system; | 376 | Core::System& system; |
| 377 | const bool is_astc_supported; | ||
| 384 | 378 | ||
| 385 | private: | 379 | private: |
| 386 | enum class RecycleStrategy : u32 { | 380 | enum class RecycleStrategy : u32 { |
| @@ -456,22 +450,18 @@ private: | |||
| 456 | * @param overlaps The overlapping surfaces registered in the cache. | 450 | * @param overlaps The overlapping surfaces registered in the cache. |
| 457 | * @param params The parameters for the new surface. | 451 | * @param params The parameters for the new surface. |
| 458 | * @param gpu_addr The starting address of the new surface. | 452 | * @param gpu_addr The starting address of the new surface. |
| 459 | * @param preserve_contents Indicates that the new surface should be loaded from memory or left | ||
| 460 | * blank. | ||
| 461 | * @param untopological Indicates to the recycler that the texture has no way to match the | 453 | * @param untopological Indicates to the recycler that the texture has no way to match the |
| 462 | * overlaps due to topological reasons. | 454 | * overlaps due to topological reasons. |
| 463 | **/ | 455 | **/ |
| 464 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | 456 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, |
| 465 | const SurfaceParams& params, const GPUVAddr gpu_addr, | 457 | const SurfaceParams& params, const GPUVAddr gpu_addr, |
| 466 | const bool preserve_contents, | ||
| 467 | const MatchTopologyResult untopological) { | 458 | const MatchTopologyResult untopological) { |
| 468 | const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; | ||
| 469 | for (auto& surface : overlaps) { | 459 | for (auto& surface : overlaps) { |
| 470 | Unregister(surface); | 460 | Unregister(surface); |
| 471 | } | 461 | } |
| 472 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | 462 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { |
| 473 | case RecycleStrategy::Ignore: { | 463 | case RecycleStrategy::Ignore: { |
| 474 | return InitializeSurface(gpu_addr, params, do_load); | 464 | return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); |
| 475 | } | 465 | } |
| 476 | case RecycleStrategy::Flush: { | 466 | case RecycleStrategy::Flush: { |
| 477 | std::sort(overlaps.begin(), overlaps.end(), | 467 | std::sort(overlaps.begin(), overlaps.end(), |
| @@ -481,7 +471,7 @@ private: | |||
| 481 | for (auto& surface : overlaps) { | 471 | for (auto& surface : overlaps) { |
| 482 | FlushSurface(surface); | 472 | FlushSurface(surface); |
| 483 | } | 473 | } |
| 484 | return InitializeSurface(gpu_addr, params, preserve_contents); | 474 | return InitializeSurface(gpu_addr, params); |
| 485 | } | 475 | } |
| 486 | case RecycleStrategy::BufferCopy: { | 476 | case RecycleStrategy::BufferCopy: { |
| 487 | auto new_surface = GetUncachedSurface(gpu_addr, params); | 477 | auto new_surface = GetUncachedSurface(gpu_addr, params); |
| @@ -490,7 +480,7 @@ private: | |||
| 490 | } | 480 | } |
| 491 | default: { | 481 | default: { |
| 492 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); | 482 | UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); |
| 493 | return InitializeSurface(gpu_addr, params, do_load); | 483 | return InitializeSurface(gpu_addr, params); |
| 494 | } | 484 | } |
| 495 | } | 485 | } |
| 496 | } | 486 | } |
| @@ -519,7 +509,9 @@ private: | |||
| 519 | } | 509 | } |
| 520 | const auto& final_params = new_surface->GetSurfaceParams(); | 510 | const auto& final_params = new_surface->GetSurfaceParams(); |
| 521 | if (cr_params.type != final_params.type) { | 511 | if (cr_params.type != final_params.type) { |
| 522 | BufferCopy(current_surface, new_surface); | 512 | if (Settings::values.use_accurate_gpu_emulation) { |
| 513 | BufferCopy(current_surface, new_surface); | ||
| 514 | } | ||
| 523 | } else { | 515 | } else { |
| 524 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); | 516 | std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); |
| 525 | for (auto& brick : bricks) { | 517 | for (auto& brick : bricks) { |
| @@ -626,14 +618,11 @@ private: | |||
| 626 | * @param params The parameters on the new surface. | 618 | * @param params The parameters on the new surface. |
| 627 | * @param gpu_addr The starting address of the new surface. | 619 | * @param gpu_addr The starting address of the new surface. |
| 628 | * @param cache_addr The starting address of the new surface on physical memory. | 620 | * @param cache_addr The starting address of the new surface on physical memory. |
| 629 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 630 | * left blank. | ||
| 631 | */ | 621 | */ |
| 632 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | 622 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, |
| 633 | const SurfaceParams& params, | 623 | const SurfaceParams& params, |
| 634 | const GPUVAddr gpu_addr, | 624 | const GPUVAddr gpu_addr, |
| 635 | const CacheAddr cache_addr, | 625 | const VAddr cpu_addr) { |
| 636 | bool preserve_contents) { | ||
| 637 | if (params.target == SurfaceTarget::Texture3D) { | 626 | if (params.target == SurfaceTarget::Texture3D) { |
| 638 | bool failed = false; | 627 | bool failed = false; |
| 639 | if (params.num_levels > 1) { | 628 | if (params.num_levels > 1) { |
| @@ -657,7 +646,7 @@ private: | |||
| 657 | failed = true; | 646 | failed = true; |
| 658 | break; | 647 | break; |
| 659 | } | 648 | } |
| 660 | const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); | 649 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); |
| 661 | const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); | 650 | const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); |
| 662 | modified |= surface->IsModified(); | 651 | modified |= surface->IsModified(); |
| 663 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | 652 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, |
| @@ -677,23 +666,23 @@ private: | |||
| 677 | } else { | 666 | } else { |
| 678 | for (const auto& surface : overlaps) { | 667 | for (const auto& surface : overlaps) { |
| 679 | if (!surface->MatchTarget(params.target)) { | 668 | if (!surface->MatchTarget(params.target)) { |
| 680 | if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { | 669 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { |
| 681 | if (Settings::values.use_accurate_gpu_emulation) { | 670 | if (Settings::values.use_accurate_gpu_emulation) { |
| 682 | return std::nullopt; | 671 | return std::nullopt; |
| 683 | } | 672 | } |
| 684 | Unregister(surface); | 673 | Unregister(surface); |
| 685 | return InitializeSurface(gpu_addr, params, preserve_contents); | 674 | return InitializeSurface(gpu_addr, params); |
| 686 | } | 675 | } |
| 687 | return std::nullopt; | 676 | return std::nullopt; |
| 688 | } | 677 | } |
| 689 | if (surface->GetCacheAddr() != cache_addr) { | 678 | if (surface->GetCpuAddr() != cpu_addr) { |
| 690 | continue; | 679 | continue; |
| 691 | } | 680 | } |
| 692 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | 681 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { |
| 693 | return {{surface, surface->GetMainView()}}; | 682 | return {{surface, surface->GetMainView()}}; |
| 694 | } | 683 | } |
| 695 | } | 684 | } |
| 696 | return InitializeSurface(gpu_addr, params, preserve_contents); | 685 | return InitializeSurface(gpu_addr, params); |
| 697 | } | 686 | } |
| 698 | } | 687 | } |
| 699 | 688 | ||
| @@ -716,23 +705,19 @@ private: | |||
| 716 | * | 705 | * |
| 717 | * @param gpu_addr The starting address of the candidate surface. | 706 | * @param gpu_addr The starting address of the candidate surface. |
| 718 | * @param params The parameters on the candidate surface. | 707 | * @param params The parameters on the candidate surface. |
| 719 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 720 | * left blank. | ||
| 721 | * @param is_render Whether or not the surface is a render target. | 708 | * @param is_render Whether or not the surface is a render target. |
| 722 | **/ | 709 | **/ |
| 723 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, | 710 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, |
| 724 | const SurfaceParams& params, bool preserve_contents, | 711 | const SurfaceParams& params, bool is_render) { |
| 725 | bool is_render) { | ||
| 726 | // Step 1 | 712 | // Step 1 |
| 727 | // Check Level 1 Cache for a fast structural match. If candidate surface | 713 | // Check Level 1 Cache for a fast structural match. If candidate surface |
| 728 | // matches at certain level we are pretty much done. | 714 | // matches at certain level we are pretty much done. |
| 729 | if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | 715 | if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { |
| 730 | TSurface& current_surface = iter->second; | 716 | TSurface& current_surface = iter->second; |
| 731 | const auto topological_result = current_surface->MatchesTopology(params); | 717 | const auto topological_result = current_surface->MatchesTopology(params); |
| 732 | if (topological_result != MatchTopologyResult::FullMatch) { | 718 | if (topological_result != MatchTopologyResult::FullMatch) { |
| 733 | std::vector<TSurface> overlaps{current_surface}; | 719 | std::vector<TSurface> overlaps{current_surface}; |
| 734 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 720 | return RecycleSurface(overlaps, params, gpu_addr, topological_result); |
| 735 | topological_result); | ||
| 736 | } | 721 | } |
| 737 | 722 | ||
| 738 | const auto struct_result = current_surface->MatchesStructure(params); | 723 | const auto struct_result = current_surface->MatchesStructure(params); |
| @@ -753,11 +738,11 @@ private: | |||
| 753 | // Step 2 | 738 | // Step 2 |
| 754 | // Obtain all possible overlaps in the memory region | 739 | // Obtain all possible overlaps in the memory region |
| 755 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 740 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); |
| 756 | auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | 741 | auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; |
| 757 | 742 | ||
| 758 | // If none are found, we are done. we just load the surface and create it. | 743 | // If none are found, we are done. we just load the surface and create it. |
| 759 | if (overlaps.empty()) { | 744 | if (overlaps.empty()) { |
| 760 | return InitializeSurface(gpu_addr, params, preserve_contents); | 745 | return InitializeSurface(gpu_addr, params); |
| 761 | } | 746 | } |
| 762 | 747 | ||
| 763 | // Step 3 | 748 | // Step 3 |
| @@ -767,15 +752,13 @@ private: | |||
| 767 | for (const auto& surface : overlaps) { | 752 | for (const auto& surface : overlaps) { |
| 768 | const auto topological_result = surface->MatchesTopology(params); | 753 | const auto topological_result = surface->MatchesTopology(params); |
| 769 | if (topological_result != MatchTopologyResult::FullMatch) { | 754 | if (topological_result != MatchTopologyResult::FullMatch) { |
| 770 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 755 | return RecycleSurface(overlaps, params, gpu_addr, topological_result); |
| 771 | topological_result); | ||
| 772 | } | 756 | } |
| 773 | } | 757 | } |
| 774 | 758 | ||
| 775 | // Check if it's a 3D texture | 759 | // Check if it's a 3D texture |
| 776 | if (params.block_depth > 0) { | 760 | if (params.block_depth > 0) { |
| 777 | auto surface = | 761 | auto surface = Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr); |
| 778 | Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); | ||
| 779 | if (surface) { | 762 | if (surface) { |
| 780 | return *surface; | 763 | return *surface; |
| 781 | } | 764 | } |
| @@ -795,8 +778,7 @@ private: | |||
| 795 | return *view; | 778 | return *view; |
| 796 | } | 779 | } |
| 797 | } | 780 | } |
| 798 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 781 | return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); |
| 799 | MatchTopologyResult::FullMatch); | ||
| 800 | } | 782 | } |
| 801 | // Now we check if the candidate is a mipmap/layer of the overlap | 783 | // Now we check if the candidate is a mipmap/layer of the overlap |
| 802 | std::optional<TView> view = | 784 | std::optional<TView> view = |
| @@ -820,7 +802,7 @@ private: | |||
| 820 | pair.first->EmplaceView(params, gpu_addr, candidate_size); | 802 | pair.first->EmplaceView(params, gpu_addr, candidate_size); |
| 821 | if (mirage_view) | 803 | if (mirage_view) |
| 822 | return {pair.first, *mirage_view}; | 804 | return {pair.first, *mirage_view}; |
| 823 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 805 | return RecycleSurface(overlaps, params, gpu_addr, |
| 824 | MatchTopologyResult::FullMatch); | 806 | MatchTopologyResult::FullMatch); |
| 825 | } | 807 | } |
| 826 | return {current_surface, *view}; | 808 | return {current_surface, *view}; |
| @@ -836,8 +818,7 @@ private: | |||
| 836 | } | 818 | } |
| 837 | } | 819 | } |
| 838 | // We failed all the tests, recycle the overlaps into a new texture. | 820 | // We failed all the tests, recycle the overlaps into a new texture. |
| 839 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 821 | return RecycleSurface(overlaps, params, gpu_addr, MatchTopologyResult::FullMatch); |
| 840 | MatchTopologyResult::FullMatch); | ||
| 841 | } | 822 | } |
| 842 | 823 | ||
| 843 | /** | 824 | /** |
| @@ -850,16 +831,16 @@ private: | |||
| 850 | * @param params The parameters on the candidate surface. | 831 | * @param params The parameters on the candidate surface. |
| 851 | **/ | 832 | **/ |
| 852 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | 833 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { |
| 853 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 834 | const std::optional<VAddr> cpu_addr = |
| 854 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 835 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 855 | 836 | ||
| 856 | if (!cache_addr) { | 837 | if (!cpu_addr) { |
| 857 | Deduction result{}; | 838 | Deduction result{}; |
| 858 | result.type = DeductionType::DeductionFailed; | 839 | result.type = DeductionType::DeductionFailed; |
| 859 | return result; | 840 | return result; |
| 860 | } | 841 | } |
| 861 | 842 | ||
| 862 | if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | 843 | if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { |
| 863 | TSurface& current_surface = iter->second; | 844 | TSurface& current_surface = iter->second; |
| 864 | const auto topological_result = current_surface->MatchesTopology(params); | 845 | const auto topological_result = current_surface->MatchesTopology(params); |
| 865 | if (topological_result != MatchTopologyResult::FullMatch) { | 846 | if (topological_result != MatchTopologyResult::FullMatch) { |
| @@ -878,7 +859,7 @@ private: | |||
| 878 | } | 859 | } |
| 879 | 860 | ||
| 880 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 861 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); |
| 881 | auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | 862 | auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; |
| 882 | 863 | ||
| 883 | if (overlaps.empty()) { | 864 | if (overlaps.empty()) { |
| 884 | Deduction result{}; | 865 | Deduction result{}; |
| @@ -995,10 +976,10 @@ private: | |||
| 995 | } | 976 | } |
| 996 | 977 | ||
| 997 | std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, | 978 | std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, |
| 998 | bool preserve_contents) { | 979 | bool do_load = true) { |
| 999 | auto new_surface{GetUncachedSurface(gpu_addr, params)}; | 980 | auto new_surface{GetUncachedSurface(gpu_addr, params)}; |
| 1000 | Register(new_surface); | 981 | Register(new_surface); |
| 1001 | if (preserve_contents) { | 982 | if (do_load) { |
| 1002 | LoadSurface(new_surface); | 983 | LoadSurface(new_surface); |
| 1003 | } | 984 | } |
| 1004 | return {new_surface, new_surface->GetMainView()}; | 985 | return {new_surface, new_surface->GetMainView()}; |
| @@ -1022,10 +1003,10 @@ private: | |||
| 1022 | } | 1003 | } |
| 1023 | 1004 | ||
| 1024 | void RegisterInnerCache(TSurface& surface) { | 1005 | void RegisterInnerCache(TSurface& surface) { |
| 1025 | const CacheAddr cache_addr = surface->GetCacheAddr(); | 1006 | const VAddr cpu_addr = surface->GetCpuAddr(); |
| 1026 | CacheAddr start = cache_addr >> registry_page_bits; | 1007 | VAddr start = cpu_addr >> registry_page_bits; |
| 1027 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | 1008 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; |
| 1028 | l1_cache[cache_addr] = surface; | 1009 | l1_cache[cpu_addr] = surface; |
| 1029 | while (start <= end) { | 1010 | while (start <= end) { |
| 1030 | registry[start].push_back(surface); | 1011 | registry[start].push_back(surface); |
| 1031 | start++; | 1012 | start++; |
| @@ -1033,10 +1014,10 @@ private: | |||
| 1033 | } | 1014 | } |
| 1034 | 1015 | ||
| 1035 | void UnregisterInnerCache(TSurface& surface) { | 1016 | void UnregisterInnerCache(TSurface& surface) { |
| 1036 | const CacheAddr cache_addr = surface->GetCacheAddr(); | 1017 | const VAddr cpu_addr = surface->GetCpuAddr(); |
| 1037 | CacheAddr start = cache_addr >> registry_page_bits; | 1018 | VAddr start = cpu_addr >> registry_page_bits; |
| 1038 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | 1019 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; |
| 1039 | l1_cache.erase(cache_addr); | 1020 | l1_cache.erase(cpu_addr); |
| 1040 | while (start <= end) { | 1021 | while (start <= end) { |
| 1041 | auto& reg{registry[start]}; | 1022 | auto& reg{registry[start]}; |
| 1042 | reg.erase(std::find(reg.begin(), reg.end(), surface)); | 1023 | reg.erase(std::find(reg.begin(), reg.end(), surface)); |
| @@ -1044,18 +1025,18 @@ private: | |||
| 1044 | } | 1025 | } |
| 1045 | } | 1026 | } |
| 1046 | 1027 | ||
| 1047 | std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { | 1028 | std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { |
| 1048 | if (size == 0) { | 1029 | if (size == 0) { |
| 1049 | return {}; | 1030 | return {}; |
| 1050 | } | 1031 | } |
| 1051 | const CacheAddr cache_addr_end = cache_addr + size; | 1032 | const VAddr cpu_addr_end = cpu_addr + size; |
| 1052 | CacheAddr start = cache_addr >> registry_page_bits; | 1033 | VAddr start = cpu_addr >> registry_page_bits; |
| 1053 | const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; | 1034 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; |
| 1054 | std::vector<TSurface> surfaces; | 1035 | std::vector<TSurface> surfaces; |
| 1055 | while (start <= end) { | 1036 | while (start <= end) { |
| 1056 | std::vector<TSurface>& list = registry[start]; | 1037 | std::vector<TSurface>& list = registry[start]; |
| 1057 | for (auto& surface : list) { | 1038 | for (auto& surface : list) { |
| 1058 | if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { | 1039 | if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { |
| 1059 | surface->MarkAsPicked(true); | 1040 | surface->MarkAsPicked(true); |
| 1060 | surfaces.push_back(surface); | 1041 | surfaces.push_back(surface); |
| 1061 | } | 1042 | } |
| @@ -1144,14 +1125,14 @@ private: | |||
| 1144 | // large in size. | 1125 | // large in size. |
| 1145 | static constexpr u64 registry_page_bits{20}; | 1126 | static constexpr u64 registry_page_bits{20}; |
| 1146 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | 1127 | static constexpr u64 registry_page_size{1 << registry_page_bits}; |
| 1147 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | 1128 | std::unordered_map<VAddr, std::vector<TSurface>> registry; |
| 1148 | 1129 | ||
| 1149 | static constexpr u32 DEPTH_RT = 8; | 1130 | static constexpr u32 DEPTH_RT = 8; |
| 1150 | static constexpr u32 NO_RT = 0xFFFFFFFF; | 1131 | static constexpr u32 NO_RT = 0xFFFFFFFF; |
| 1151 | 1132 | ||
| 1152 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 1133 | // The L1 Cache is used for fast texture lookup before checking the overlaps |
| 1153 | // This avoids calculating size and other stuffs. | 1134 | // This avoids calculating size and other stuffs. |
| 1154 | std::unordered_map<CacheAddr, TSurface> l1_cache; | 1135 | std::unordered_map<VAddr, TSurface> l1_cache; |
| 1155 | 1136 | ||
| 1156 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | 1137 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have |
| 1157 | /// previously been used. This is to prevent surfaces from being constantly created and | 1138 | /// previously been used. This is to prevent surfaces from being constantly created and |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 062b4f252..365bde2f1 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -20,6 +20,8 @@ | |||
| 20 | #include <cstring> | 20 | #include <cstring> |
| 21 | #include <vector> | 21 | #include <vector> |
| 22 | 22 | ||
| 23 | #include <boost/container/static_vector.hpp> | ||
| 24 | |||
| 23 | #include "common/common_types.h" | 25 | #include "common/common_types.h" |
| 24 | 26 | ||
| 25 | #include "video_core/textures/astc.h" | 27 | #include "video_core/textures/astc.h" |
| @@ -39,25 +41,25 @@ constexpr u32 Popcnt(u32 n) { | |||
| 39 | 41 | ||
| 40 | class InputBitStream { | 42 | class InputBitStream { |
| 41 | public: | 43 | public: |
| 42 | explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) | 44 | constexpr explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) |
| 43 | : m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 45 | : cur_byte{ptr}, next_bit{start_offset % 8} {} |
| 44 | 46 | ||
| 45 | std::size_t GetBitsRead() const { | 47 | constexpr std::size_t GetBitsRead() const { |
| 46 | return m_BitsRead; | 48 | return bits_read; |
| 47 | } | 49 | } |
| 48 | 50 | ||
| 49 | u32 ReadBit() { | 51 | constexpr bool ReadBit() { |
| 50 | u32 bit = *m_CurByte >> m_NextBit++; | 52 | const bool bit = (*cur_byte >> next_bit++) & 1; |
| 51 | while (m_NextBit >= 8) { | 53 | while (next_bit >= 8) { |
| 52 | m_NextBit -= 8; | 54 | next_bit -= 8; |
| 53 | m_CurByte++; | 55 | cur_byte++; |
| 54 | } | 56 | } |
| 55 | 57 | ||
| 56 | m_BitsRead++; | 58 | bits_read++; |
| 57 | return bit & 1; | 59 | return bit; |
| 58 | } | 60 | } |
| 59 | 61 | ||
| 60 | u32 ReadBits(std::size_t nBits) { | 62 | constexpr u32 ReadBits(std::size_t nBits) { |
| 61 | u32 ret = 0; | 63 | u32 ret = 0; |
| 62 | for (std::size_t i = 0; i < nBits; ++i) { | 64 | for (std::size_t i = 0; i < nBits; ++i) { |
| 63 | ret |= (ReadBit() & 1) << i; | 65 | ret |= (ReadBit() & 1) << i; |
| @@ -66,7 +68,7 @@ public: | |||
| 66 | } | 68 | } |
| 67 | 69 | ||
| 68 | template <std::size_t nBits> | 70 | template <std::size_t nBits> |
| 69 | u32 ReadBits() { | 71 | constexpr u32 ReadBits() { |
| 70 | u32 ret = 0; | 72 | u32 ret = 0; |
| 71 | for (std::size_t i = 0; i < nBits; ++i) { | 73 | for (std::size_t i = 0; i < nBits; ++i) { |
| 72 | ret |= (ReadBit() & 1) << i; | 74 | ret |= (ReadBit() & 1) << i; |
| @@ -75,64 +77,58 @@ public: | |||
| 75 | } | 77 | } |
| 76 | 78 | ||
| 77 | private: | 79 | private: |
| 78 | const u8* m_CurByte; | 80 | const u8* cur_byte; |
| 79 | std::size_t m_NextBit = 0; | 81 | std::size_t next_bit = 0; |
| 80 | std::size_t m_BitsRead = 0; | 82 | std::size_t bits_read = 0; |
| 81 | }; | 83 | }; |
| 82 | 84 | ||
| 83 | class OutputBitStream { | 85 | class OutputBitStream { |
| 84 | public: | 86 | public: |
| 85 | explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) | 87 | constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) |
| 86 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 88 | : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} |
| 87 | |||
| 88 | ~OutputBitStream() = default; | ||
| 89 | 89 | ||
| 90 | s32 GetBitsWritten() const { | 90 | constexpr std::size_t GetBitsWritten() const { |
| 91 | return m_BitsWritten; | 91 | return bits_written; |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | void WriteBitsR(u32 val, u32 nBits) { | 94 | constexpr void WriteBitsR(u32 val, u32 nBits) { |
| 95 | for (u32 i = 0; i < nBits; i++) { | 95 | for (u32 i = 0; i < nBits; i++) { |
| 96 | WriteBit((val >> (nBits - i - 1)) & 1); | 96 | WriteBit((val >> (nBits - i - 1)) & 1); |
| 97 | } | 97 | } |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | void WriteBits(u32 val, u32 nBits) { | 100 | constexpr void WriteBits(u32 val, u32 nBits) { |
| 101 | for (u32 i = 0; i < nBits; i++) { | 101 | for (u32 i = 0; i < nBits; i++) { |
| 102 | WriteBit((val >> i) & 1); | 102 | WriteBit((val >> i) & 1); |
| 103 | } | 103 | } |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | private: | 106 | private: |
| 107 | void WriteBit(s32 b) { | 107 | constexpr void WriteBit(bool b) { |
| 108 | 108 | if (bits_written >= num_bits) { | |
| 109 | if (done) | ||
| 110 | return; | 109 | return; |
| 110 | } | ||
| 111 | 111 | ||
| 112 | const u32 mask = 1 << m_NextBit++; | 112 | const u32 mask = 1 << next_bit++; |
| 113 | 113 | ||
| 114 | // clear the bit | 114 | // clear the bit |
| 115 | *m_CurByte &= static_cast<u8>(~mask); | 115 | *cur_byte &= static_cast<u8>(~mask); |
| 116 | 116 | ||
| 117 | // Write the bit, if necessary | 117 | // Write the bit, if necessary |
| 118 | if (b) | 118 | if (b) |
| 119 | *m_CurByte |= static_cast<u8>(mask); | 119 | *cur_byte |= static_cast<u8>(mask); |
| 120 | 120 | ||
| 121 | // Next byte? | 121 | // Next byte? |
| 122 | if (m_NextBit >= 8) { | 122 | if (next_bit >= 8) { |
| 123 | m_CurByte += 1; | 123 | cur_byte += 1; |
| 124 | m_NextBit = 0; | 124 | next_bit = 0; |
| 125 | } | 125 | } |
| 126 | |||
| 127 | done = done || ++m_BitsWritten >= m_NumBits; | ||
| 128 | } | 126 | } |
| 129 | 127 | ||
| 130 | s32 m_BitsWritten = 0; | 128 | u8* cur_byte; |
| 131 | const s32 m_NumBits; | 129 | std::size_t num_bits; |
| 132 | u8* m_CurByte; | 130 | std::size_t bits_written = 0; |
| 133 | s32 m_NextBit = 0; | 131 | std::size_t next_bit = 0; |
| 134 | |||
| 135 | bool done = false; | ||
| 136 | }; | 132 | }; |
| 137 | 133 | ||
| 138 | template <typename IntType> | 134 | template <typename IntType> |
| @@ -195,9 +191,13 @@ struct IntegerEncodedValue { | |||
| 195 | u32 trit_value; | 191 | u32 trit_value; |
| 196 | }; | 192 | }; |
| 197 | }; | 193 | }; |
| 194 | using IntegerEncodedVector = boost::container::static_vector< | ||
| 195 | IntegerEncodedValue, 64, | ||
| 196 | boost::container::static_vector_options< | ||
| 197 | boost::container::inplace_alignment<alignof(IntegerEncodedValue)>, | ||
| 198 | boost::container::throw_on_overflow<false>>::type>; | ||
| 198 | 199 | ||
| 199 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 200 | static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { |
| 200 | u32 nBitsPerValue) { | ||
| 201 | // Implement the algorithm in section C.2.12 | 201 | // Implement the algorithm in section C.2.12 |
| 202 | u32 m[5]; | 202 | u32 m[5]; |
| 203 | u32 t[5]; | 203 | u32 t[5]; |
| @@ -255,7 +255,7 @@ static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValu | |||
| 255 | } | 255 | } |
| 256 | } | 256 | } |
| 257 | 257 | ||
| 258 | static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, | 258 | static void DecodeQus32Block(InputBitStream& bits, IntegerEncodedVector& result, |
| 259 | u32 nBitsPerValue) { | 259 | u32 nBitsPerValue) { |
| 260 | // Implement the algorithm in section C.2.12 | 260 | // Implement the algorithm in section C.2.12 |
| 261 | u32 m[3]; | 261 | u32 m[3]; |
| @@ -343,8 +343,8 @@ static constexpr std::array EncodingsValues = MakeEncodedValues(); | |||
| 343 | // Fills result with the values that are encoded in the given | 343 | // Fills result with the values that are encoded in the given |
| 344 | // bitstream. We must know beforehand what the maximum possible | 344 | // bitstream. We must know beforehand what the maximum possible |
| 345 | // value is, and how many values we're decoding. | 345 | // value is, and how many values we're decoding. |
| 346 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, | 346 | static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, |
| 347 | u32 maxRange, u32 nValues) { | 347 | u32 nValues) { |
| 348 | // Determine encoding parameters | 348 | // Determine encoding parameters |
| 349 | IntegerEncodedValue val = EncodingsValues[maxRange]; | 349 | IntegerEncodedValue val = EncodingsValues[maxRange]; |
| 350 | 350 | ||
| @@ -634,12 +634,14 @@ static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { | |||
| 634 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] | 634 | // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] |
| 635 | // is the same as [(numBits - 1):0] and repeats all the way down. | 635 | // is the same as [(numBits - 1):0] and repeats all the way down. |
| 636 | template <typename IntType> | 636 | template <typename IntType> |
| 637 | static IntType Replicate(IntType val, u32 numBits, u32 toBit) { | 637 | static constexpr IntType Replicate(IntType val, u32 numBits, u32 toBit) { |
| 638 | if (numBits == 0) | 638 | if (numBits == 0) { |
| 639 | return 0; | 639 | return 0; |
| 640 | if (toBit == 0) | 640 | } |
| 641 | if (toBit == 0) { | ||
| 641 | return 0; | 642 | return 0; |
| 642 | IntType v = val & static_cast<IntType>((1 << numBits) - 1); | 643 | } |
| 644 | const IntType v = val & static_cast<IntType>((1 << numBits) - 1); | ||
| 643 | IntType res = v; | 645 | IntType res = v; |
| 644 | u32 reslen = numBits; | 646 | u32 reslen = numBits; |
| 645 | while (reslen < toBit) { | 647 | while (reslen < toBit) { |
| @@ -656,6 +658,89 @@ static IntType Replicate(IntType val, u32 numBits, u32 toBit) { | |||
| 656 | return res; | 658 | return res; |
| 657 | } | 659 | } |
| 658 | 660 | ||
| 661 | static constexpr std::size_t NumReplicateEntries(u32 num_bits) { | ||
| 662 | return std::size_t(1) << num_bits; | ||
| 663 | } | ||
| 664 | |||
| 665 | template <typename IntType, u32 num_bits, u32 to_bit> | ||
| 666 | static constexpr auto MakeReplicateTable() { | ||
| 667 | std::array<IntType, NumReplicateEntries(num_bits)> table{}; | ||
| 668 | for (IntType value = 0; value < static_cast<IntType>(std::size(table)); ++value) { | ||
| 669 | table[value] = Replicate(value, num_bits, to_bit); | ||
| 670 | } | ||
| 671 | return table; | ||
| 672 | } | ||
| 673 | |||
| 674 | static constexpr auto REPLICATE_BYTE_TO_16_TABLE = MakeReplicateTable<u32, 8, 16>(); | ||
| 675 | static constexpr u32 ReplicateByteTo16(std::size_t value) { | ||
| 676 | return REPLICATE_BYTE_TO_16_TABLE[value]; | ||
| 677 | } | ||
| 678 | |||
| 679 | static constexpr auto REPLICATE_BIT_TO_7_TABLE = MakeReplicateTable<u32, 1, 7>(); | ||
| 680 | static constexpr u32 ReplicateBitTo7(std::size_t value) { | ||
| 681 | return REPLICATE_BIT_TO_7_TABLE[value]; | ||
| 682 | } | ||
| 683 | |||
| 684 | static constexpr auto REPLICATE_BIT_TO_9_TABLE = MakeReplicateTable<u32, 1, 9>(); | ||
| 685 | static constexpr u32 ReplicateBitTo9(std::size_t value) { | ||
| 686 | return REPLICATE_BIT_TO_9_TABLE[value]; | ||
| 687 | } | ||
| 688 | |||
| 689 | static constexpr auto REPLICATE_1_BIT_TO_8_TABLE = MakeReplicateTable<u32, 1, 8>(); | ||
| 690 | static constexpr auto REPLICATE_2_BIT_TO_8_TABLE = MakeReplicateTable<u32, 2, 8>(); | ||
| 691 | static constexpr auto REPLICATE_3_BIT_TO_8_TABLE = MakeReplicateTable<u32, 3, 8>(); | ||
| 692 | static constexpr auto REPLICATE_4_BIT_TO_8_TABLE = MakeReplicateTable<u32, 4, 8>(); | ||
| 693 | static constexpr auto REPLICATE_5_BIT_TO_8_TABLE = MakeReplicateTable<u32, 5, 8>(); | ||
| 694 | static constexpr auto REPLICATE_6_BIT_TO_8_TABLE = MakeReplicateTable<u32, 6, 8>(); | ||
| 695 | static constexpr auto REPLICATE_7_BIT_TO_8_TABLE = MakeReplicateTable<u32, 7, 8>(); | ||
| 696 | static constexpr auto REPLICATE_8_BIT_TO_8_TABLE = MakeReplicateTable<u32, 8, 8>(); | ||
| 697 | /// Use a precompiled table with the most common usages, if it's not in the expected range, fallback | ||
| 698 | /// to the runtime implementation | ||
| 699 | static constexpr u32 FastReplicateTo8(u32 value, u32 num_bits) { | ||
| 700 | switch (num_bits) { | ||
| 701 | case 1: | ||
| 702 | return REPLICATE_1_BIT_TO_8_TABLE[value]; | ||
| 703 | case 2: | ||
| 704 | return REPLICATE_2_BIT_TO_8_TABLE[value]; | ||
| 705 | case 3: | ||
| 706 | return REPLICATE_3_BIT_TO_8_TABLE[value]; | ||
| 707 | case 4: | ||
| 708 | return REPLICATE_4_BIT_TO_8_TABLE[value]; | ||
| 709 | case 5: | ||
| 710 | return REPLICATE_5_BIT_TO_8_TABLE[value]; | ||
| 711 | case 6: | ||
| 712 | return REPLICATE_6_BIT_TO_8_TABLE[value]; | ||
| 713 | case 7: | ||
| 714 | return REPLICATE_7_BIT_TO_8_TABLE[value]; | ||
| 715 | case 8: | ||
| 716 | return REPLICATE_8_BIT_TO_8_TABLE[value]; | ||
| 717 | default: | ||
| 718 | return Replicate(value, num_bits, 8); | ||
| 719 | } | ||
| 720 | } | ||
| 721 | |||
| 722 | static constexpr auto REPLICATE_1_BIT_TO_6_TABLE = MakeReplicateTable<u32, 1, 6>(); | ||
| 723 | static constexpr auto REPLICATE_2_BIT_TO_6_TABLE = MakeReplicateTable<u32, 2, 6>(); | ||
| 724 | static constexpr auto REPLICATE_3_BIT_TO_6_TABLE = MakeReplicateTable<u32, 3, 6>(); | ||
| 725 | static constexpr auto REPLICATE_4_BIT_TO_6_TABLE = MakeReplicateTable<u32, 4, 6>(); | ||
| 726 | static constexpr auto REPLICATE_5_BIT_TO_6_TABLE = MakeReplicateTable<u32, 5, 6>(); | ||
| 727 | static constexpr u32 FastReplicateTo6(u32 value, u32 num_bits) { | ||
| 728 | switch (num_bits) { | ||
| 729 | case 1: | ||
| 730 | return REPLICATE_1_BIT_TO_6_TABLE[value]; | ||
| 731 | case 2: | ||
| 732 | return REPLICATE_2_BIT_TO_6_TABLE[value]; | ||
| 733 | case 3: | ||
| 734 | return REPLICATE_3_BIT_TO_6_TABLE[value]; | ||
| 735 | case 4: | ||
| 736 | return REPLICATE_4_BIT_TO_6_TABLE[value]; | ||
| 737 | case 5: | ||
| 738 | return REPLICATE_5_BIT_TO_6_TABLE[value]; | ||
| 739 | default: | ||
| 740 | return Replicate(value, num_bits, 6); | ||
| 741 | } | ||
| 742 | } | ||
| 743 | |||
| 659 | class Pixel { | 744 | class Pixel { |
| 660 | protected: | 745 | protected: |
| 661 | using ChannelType = s16; | 746 | using ChannelType = s16; |
| @@ -674,10 +759,10 @@ public: | |||
| 674 | // significant bits when going from larger to smaller bit depth | 759 | // significant bits when going from larger to smaller bit depth |
| 675 | // or by repeating the most significant bits when going from | 760 | // or by repeating the most significant bits when going from |
| 676 | // smaller to larger bit depths. | 761 | // smaller to larger bit depths. |
| 677 | void ChangeBitDepth(const u8 (&depth)[4]) { | 762 | void ChangeBitDepth() { |
| 678 | for (u32 i = 0; i < 4; i++) { | 763 | for (u32 i = 0; i < 4; i++) { |
| 679 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); | 764 | Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i]); |
| 680 | m_BitDepth[i] = depth[i]; | 765 | m_BitDepth[i] = 8; |
| 681 | } | 766 | } |
| 682 | } | 767 | } |
| 683 | 768 | ||
| @@ -689,28 +774,23 @@ public: | |||
| 689 | 774 | ||
| 690 | // Changes the bit depth of a single component. See the comment | 775 | // Changes the bit depth of a single component. See the comment |
| 691 | // above for how we do this. | 776 | // above for how we do this. |
| 692 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { | 777 | static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth) { |
| 693 | assert(newDepth <= 8); | ||
| 694 | assert(oldDepth <= 8); | 778 | assert(oldDepth <= 8); |
| 695 | 779 | ||
| 696 | if (oldDepth == newDepth) { | 780 | if (oldDepth == 8) { |
| 697 | // Do nothing | 781 | // Do nothing |
| 698 | return val; | 782 | return val; |
| 699 | } else if (oldDepth == 0 && newDepth != 0) { | 783 | } else if (oldDepth == 0) { |
| 700 | return static_cast<ChannelType>((1 << newDepth) - 1); | 784 | return static_cast<ChannelType>((1 << 8) - 1); |
| 701 | } else if (newDepth > oldDepth) { | 785 | } else if (8 > oldDepth) { |
| 702 | return Replicate(val, oldDepth, newDepth); | 786 | return static_cast<ChannelType>(FastReplicateTo8(static_cast<u32>(val), oldDepth)); |
| 703 | } else { | 787 | } else { |
| 704 | // oldDepth > newDepth | 788 | // oldDepth > newDepth |
| 705 | if (newDepth == 0) { | 789 | const u8 bitsWasted = static_cast<u8>(oldDepth - 8); |
| 706 | return 0xFF; | 790 | u16 v = static_cast<u16>(val); |
| 707 | } else { | 791 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); |
| 708 | u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); | 792 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << 8) - 1)); |
| 709 | u16 v = static_cast<u16>(val); | 793 | return static_cast<u8>(v); |
| 710 | v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); | ||
| 711 | v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); | ||
| 712 | return static_cast<u8>(v); | ||
| 713 | } | ||
| 714 | } | 794 | } |
| 715 | 795 | ||
| 716 | assert(false && "We shouldn't get here."); | 796 | assert(false && "We shouldn't get here."); |
| @@ -760,8 +840,7 @@ public: | |||
| 760 | // up in the most-significant byte. | 840 | // up in the most-significant byte. |
| 761 | u32 Pack() const { | 841 | u32 Pack() const { |
| 762 | Pixel eightBit(*this); | 842 | Pixel eightBit(*this); |
| 763 | const u8 eightBitDepth[4] = {8, 8, 8, 8}; | 843 | eightBit.ChangeBitDepth(); |
| 764 | eightBit.ChangeBitDepth(eightBitDepth); | ||
| 765 | 844 | ||
| 766 | u32 r = 0; | 845 | u32 r = 0; |
| 767 | r |= eightBit.A(); | 846 | r |= eightBit.A(); |
| @@ -816,8 +895,7 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP | |||
| 816 | } | 895 | } |
| 817 | 896 | ||
| 818 | // We now have enough to decode our integer sequence. | 897 | // We now have enough to decode our integer sequence. |
| 819 | std::vector<IntegerEncodedValue> decodedColorValues; | 898 | IntegerEncodedVector decodedColorValues; |
| 820 | decodedColorValues.reserve(32); | ||
| 821 | 899 | ||
| 822 | InputBitStream colorStream(data); | 900 | InputBitStream colorStream(data); |
| 823 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 901 | DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| @@ -839,12 +917,12 @@ static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nP | |||
| 839 | 917 | ||
| 840 | u32 A = 0, B = 0, C = 0, D = 0; | 918 | u32 A = 0, B = 0, C = 0, D = 0; |
| 841 | // A is just the lsb replicated 9 times. | 919 | // A is just the lsb replicated 9 times. |
| 842 | A = Replicate(bitval & 1, 1, 9); | 920 | A = ReplicateBitTo9(bitval & 1); |
| 843 | 921 | ||
| 844 | switch (val.encoding) { | 922 | switch (val.encoding) { |
| 845 | // Replicate bits | 923 | // Replicate bits |
| 846 | case IntegerEncoding::JustBits: | 924 | case IntegerEncoding::JustBits: |
| 847 | out[outIdx++] = Replicate(bitval, bitlen, 8); | 925 | out[outIdx++] = FastReplicateTo8(bitval, bitlen); |
| 848 | break; | 926 | break; |
| 849 | 927 | ||
| 850 | // Use algorithm in C.2.13 | 928 | // Use algorithm in C.2.13 |
| @@ -962,13 +1040,13 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 962 | u32 bitval = val.bit_value; | 1040 | u32 bitval = val.bit_value; |
| 963 | u32 bitlen = val.num_bits; | 1041 | u32 bitlen = val.num_bits; |
| 964 | 1042 | ||
| 965 | u32 A = Replicate(bitval & 1, 1, 7); | 1043 | u32 A = ReplicateBitTo7(bitval & 1); |
| 966 | u32 B = 0, C = 0, D = 0; | 1044 | u32 B = 0, C = 0, D = 0; |
| 967 | 1045 | ||
| 968 | u32 result = 0; | 1046 | u32 result = 0; |
| 969 | switch (val.encoding) { | 1047 | switch (val.encoding) { |
| 970 | case IntegerEncoding::JustBits: | 1048 | case IntegerEncoding::JustBits: |
| 971 | result = Replicate(bitval, bitlen, 6); | 1049 | result = FastReplicateTo6(bitval, bitlen); |
| 972 | break; | 1050 | break; |
| 973 | 1051 | ||
| 974 | case IntegerEncoding::Trit: { | 1052 | case IntegerEncoding::Trit: { |
| @@ -1047,7 +1125,7 @@ static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { | |||
| 1047 | return result; | 1125 | return result; |
| 1048 | } | 1126 | } |
| 1049 | 1127 | ||
| 1050 | static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, | 1128 | static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, |
| 1051 | const TexelWeightParams& params, const u32 blockWidth, | 1129 | const TexelWeightParams& params, const u32 blockWidth, |
| 1052 | const u32 blockHeight) { | 1130 | const u32 blockHeight) { |
| 1053 | u32 weightIdx = 0; | 1131 | u32 weightIdx = 0; |
| @@ -1545,8 +1623,7 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1545 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); | 1623 | static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); |
| 1546 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1624 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1547 | 1625 | ||
| 1548 | std::vector<IntegerEncodedValue> texelWeightValues; | 1626 | IntegerEncodedVector texelWeightValues; |
| 1549 | texelWeightValues.reserve(64); | ||
| 1550 | 1627 | ||
| 1551 | InputBitStream weightStream(texelWeightData); | 1628 | InputBitStream weightStream(texelWeightData); |
| 1552 | 1629 | ||
| @@ -1568,9 +1645,9 @@ static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 | |||
| 1568 | Pixel p; | 1645 | Pixel p; |
| 1569 | for (u32 c = 0; c < 4; c++) { | 1646 | for (u32 c = 0; c < 4; c++) { |
| 1570 | u32 C0 = endpos32s[partition][0].Component(c); | 1647 | u32 C0 = endpos32s[partition][0].Component(c); |
| 1571 | C0 = Replicate(C0, 8, 16); | 1648 | C0 = ReplicateByteTo16(C0); |
| 1572 | u32 C1 = endpos32s[partition][1].Component(c); | 1649 | u32 C1 = endpos32s[partition][1].Component(c); |
| 1573 | C1 = Replicate(C1, 8, 16); | 1650 | C1 = ReplicateByteTo16(C1); |
| 1574 | 1651 | ||
| 1575 | u32 plane = 0; | 1652 | u32 plane = 0; |
| 1576 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { | 1653 | if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { |
diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp new file mode 100644 index 000000000..d1939d744 --- /dev/null +++ b/src/video_core/textures/texture.cpp | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | |||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/textures/texture.h" | ||
| 10 | |||
| 11 | namespace Tegra::Texture { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | |||
| 15 | constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { | ||
| 16 | 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f, | ||
| 17 | 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f, | ||
| 18 | 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f, | ||
| 19 | 0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f, | ||
| 20 | 0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f, | ||
| 21 | 0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f, | ||
| 22 | 0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f, | ||
| 23 | 0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f, | ||
| 24 | 0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f, | ||
| 25 | 0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f, | ||
| 26 | 0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f, | ||
| 27 | 0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f, | ||
| 28 | 0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f, | ||
| 29 | 0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f, | ||
| 30 | 0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f, | ||
| 31 | 0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f, | ||
| 32 | 0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f, | ||
| 33 | 0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f, | ||
| 34 | 0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f, | ||
| 35 | 0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f, | ||
| 36 | 0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f, | ||
| 37 | 0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f, | ||
| 38 | 0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f, | ||
| 39 | 0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f, | ||
| 40 | 0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f, | ||
| 41 | 0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f, | ||
| 42 | 0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f, | ||
| 43 | 0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f, | ||
| 44 | 0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f, | ||
| 45 | 0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f, | ||
| 46 | 0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f, | ||
| 47 | 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, | ||
| 48 | }; | ||
| 49 | |||
| 50 | unsigned SettingsMinimumAnisotropy() noexcept { | ||
| 51 | switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { | ||
| 52 | default: | ||
| 53 | case Anisotropy::Default: | ||
| 54 | return 1U; | ||
| 55 | case Anisotropy::Filter2x: | ||
| 56 | return 2U; | ||
| 57 | case Anisotropy::Filter4x: | ||
| 58 | return 4U; | ||
| 59 | case Anisotropy::Filter8x: | ||
| 60 | return 8U; | ||
| 61 | case Anisotropy::Filter16x: | ||
| 62 | return 16U; | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | } // Anonymous namespace | ||
| 67 | |||
| 68 | std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { | ||
| 69 | if (!srgb_conversion) { | ||
| 70 | return border_color; | ||
| 71 | } | ||
| 72 | return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g], | ||
| 73 | SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; | ||
| 74 | } | ||
| 75 | |||
| 76 | float TSCEntry::GetMaxAnisotropy() const noexcept { | ||
| 77 | return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace Tegra::Texture | ||
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 7edc4abe1..eba05aced 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "core/settings.h" | ||
| 12 | 11 | ||
| 13 | namespace Tegra::Texture { | 12 | namespace Tegra::Texture { |
| 14 | 13 | ||
| @@ -132,6 +131,20 @@ enum class SwizzleSource : u32 { | |||
| 132 | OneFloat = 7, | 131 | OneFloat = 7, |
| 133 | }; | 132 | }; |
| 134 | 133 | ||
| 134 | enum class MsaaMode : u32 { | ||
| 135 | Msaa1x1 = 0, | ||
| 136 | Msaa2x1 = 1, | ||
| 137 | Msaa2x2 = 2, | ||
| 138 | Msaa4x2 = 3, | ||
| 139 | Msaa4x2_D3D = 4, | ||
| 140 | Msaa2x1_D3D = 5, | ||
| 141 | Msaa4x4 = 6, | ||
| 142 | Msaa2x2_VC4 = 8, | ||
| 143 | Msaa2x2_VC12 = 9, | ||
| 144 | Msaa4x2_VC8 = 10, | ||
| 145 | Msaa4x2_VC24 = 11, | ||
| 146 | }; | ||
| 147 | |||
| 135 | union TextureHandle { | 148 | union TextureHandle { |
| 136 | TextureHandle(u32 raw) : raw{raw} {} | 149 | TextureHandle(u32 raw) : raw{raw} {} |
| 137 | 150 | ||
| @@ -198,6 +211,7 @@ struct TICEntry { | |||
| 198 | union { | 211 | union { |
| 199 | BitField<0, 4, u32> res_min_mip_level; | 212 | BitField<0, 4, u32> res_min_mip_level; |
| 200 | BitField<4, 4, u32> res_max_mip_level; | 213 | BitField<4, 4, u32> res_max_mip_level; |
| 214 | BitField<8, 4, MsaaMode> msaa_mode; | ||
| 201 | BitField<12, 12, u32> min_lod_clamp; | 215 | BitField<12, 12, u32> min_lod_clamp; |
| 202 | }; | 216 | }; |
| 203 | 217 | ||
| @@ -336,24 +350,9 @@ struct TSCEntry { | |||
| 336 | std::array<u8, 0x20> raw; | 350 | std::array<u8, 0x20> raw; |
| 337 | }; | 351 | }; |
| 338 | 352 | ||
| 339 | float GetMaxAnisotropy() const { | 353 | std::array<float, 4> GetBorderColor() const noexcept; |
| 340 | const u32 min_value = [] { | 354 | |
| 341 | switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { | 355 | float GetMaxAnisotropy() const noexcept; |
| 342 | default: | ||
| 343 | case Anisotropy::Default: | ||
| 344 | return 1U; | ||
| 345 | case Anisotropy::Filter2x: | ||
| 346 | return 2U; | ||
| 347 | case Anisotropy::Filter4x: | ||
| 348 | return 4U; | ||
| 349 | case Anisotropy::Filter8x: | ||
| 350 | return 8U; | ||
| 351 | case Anisotropy::Filter16x: | ||
| 352 | return 16U; | ||
| 353 | } | ||
| 354 | }(); | ||
| 355 | return static_cast<float>(std::max(1U << max_anisotropy, min_value)); | ||
| 356 | } | ||
| 357 | 356 | ||
| 358 | float GetMinLod() const { | 357 | float GetMinLod() const { |
| 359 | return static_cast<float>(min_lod_clamp) / 256.0f; | 358 | return static_cast<float>(min_lod_clamp) / 256.0f; |
| @@ -368,15 +367,6 @@ struct TSCEntry { | |||
| 368 | constexpr u32 mask = 1U << (13 - 1); | 367 | constexpr u32 mask = 1U << (13 - 1); |
| 369 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; | 368 | return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; |
| 370 | } | 369 | } |
| 371 | |||
| 372 | std::array<float, 4> GetBorderColor() const { | ||
| 373 | if (srgb_conversion) { | ||
| 374 | return {static_cast<float>(srgb_border_color_r) / 255.0f, | ||
| 375 | static_cast<float>(srgb_border_color_g) / 255.0f, | ||
| 376 | static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]}; | ||
| 377 | } | ||
| 378 | return border_color; | ||
| 379 | } | ||
| 380 | }; | 370 | }; |
| 381 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); | 371 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); |
| 382 | 372 | ||
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index d34b47b3f..8b9404718 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt | |||
| @@ -150,6 +150,10 @@ target_link_libraries(yuzu PRIVATE common core input_common video_core) | |||
| 150 | target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) | 150 | target_link_libraries(yuzu PRIVATE Boost::boost glad Qt5::OpenGL Qt5::Widgets) |
| 151 | target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) | 151 | target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads) |
| 152 | 152 | ||
| 153 | if (ENABLE_VULKAN AND NOT WIN32) | ||
| 154 | target_include_directories(yuzu PRIVATE ${Qt5Gui_PRIVATE_INCLUDE_DIRS}) | ||
| 155 | endif() | ||
| 156 | |||
| 153 | target_compile_definitions(yuzu PRIVATE | 157 | target_compile_definitions(yuzu PRIVATE |
| 154 | # Use QStringBuilder for string concatenation to reduce | 158 | # Use QStringBuilder for string concatenation to reduce |
| 155 | # the overall number of temporary strings created. | 159 | # the overall number of temporary strings created. |
diff --git a/src/yuzu/about_dialog.cpp b/src/yuzu/about_dialog.cpp index d39b3f07a..695b2ef5f 100644 --- a/src/yuzu/about_dialog.cpp +++ b/src/yuzu/about_dialog.cpp | |||
| @@ -3,15 +3,22 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <QIcon> | 5 | #include <QIcon> |
| 6 | #include <fmt/format.h> | ||
| 6 | #include "common/scm_rev.h" | 7 | #include "common/scm_rev.h" |
| 7 | #include "ui_aboutdialog.h" | 8 | #include "ui_aboutdialog.h" |
| 8 | #include "yuzu/about_dialog.h" | 9 | #include "yuzu/about_dialog.h" |
| 9 | 10 | ||
| 10 | AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { | 11 | AboutDialog::AboutDialog(QWidget* parent) : QDialog(parent), ui(new Ui::AboutDialog) { |
| 12 | const auto build_id = std::string(Common::g_build_id); | ||
| 13 | const auto fmt = std::string(Common::g_title_bar_format_idle); | ||
| 14 | const auto yuzu_build_version = | ||
| 15 | fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{}, | ||
| 16 | std::string{}, std::string{}, std::string{}, build_id); | ||
| 17 | |||
| 11 | ui->setupUi(this); | 18 | ui->setupUi(this); |
| 12 | ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200)); | 19 | ui->labelLogo->setPixmap(QIcon::fromTheme(QStringLiteral("yuzu")).pixmap(200)); |
| 13 | ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg( | 20 | ui->labelBuildInfo->setText(ui->labelBuildInfo->text().arg( |
| 14 | QString::fromUtf8(Common::g_build_fullname), QString::fromUtf8(Common::g_scm_branch), | 21 | QString::fromStdString(yuzu_build_version), QString::fromUtf8(Common::g_scm_branch), |
| 15 | QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10))); | 22 | QString::fromUtf8(Common::g_scm_desc), QString::fromUtf8(Common::g_build_date).left(10))); |
| 16 | } | 23 | } |
| 17 | 24 | ||
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index eaded2640..1cac2f942 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -14,8 +14,9 @@ | |||
| 14 | #include <QScreen> | 14 | #include <QScreen> |
| 15 | #include <QStringList> | 15 | #include <QStringList> |
| 16 | #include <QWindow> | 16 | #include <QWindow> |
| 17 | #ifdef HAS_VULKAN | 17 | |
| 18 | #include <QVulkanWindow> | 18 | #if !defined(WIN32) && HAS_VULKAN |
| 19 | #include <qpa/qplatformnativeinterface.h> | ||
| 19 | #endif | 20 | #endif |
| 20 | 21 | ||
| 21 | #include <fmt/format.h> | 22 | #include <fmt/format.h> |
| @@ -224,7 +225,6 @@ public: | |||
| 224 | } | 225 | } |
| 225 | 226 | ||
| 226 | context->MakeCurrent(); | 227 | context->MakeCurrent(); |
| 227 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); | ||
| 228 | if (Core::System::GetInstance().Renderer().TryPresent(100)) { | 228 | if (Core::System::GetInstance().Renderer().TryPresent(100)) { |
| 229 | context->SwapBuffers(); | 229 | context->SwapBuffers(); |
| 230 | glFinish(); | 230 | glFinish(); |
| @@ -238,16 +238,50 @@ private: | |||
| 238 | #ifdef HAS_VULKAN | 238 | #ifdef HAS_VULKAN |
| 239 | class VulkanRenderWidget : public RenderWidget { | 239 | class VulkanRenderWidget : public RenderWidget { |
| 240 | public: | 240 | public: |
| 241 | explicit VulkanRenderWidget(GRenderWindow* parent, QVulkanInstance* instance) | 241 | explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) { |
| 242 | : RenderWidget(parent) { | ||
| 243 | windowHandle()->setSurfaceType(QWindow::VulkanSurface); | 242 | windowHandle()->setSurfaceType(QWindow::VulkanSurface); |
| 244 | windowHandle()->setVulkanInstance(instance); | ||
| 245 | } | 243 | } |
| 246 | }; | 244 | }; |
| 247 | #endif | 245 | #endif |
| 248 | 246 | ||
| 249 | GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread) | 247 | static Core::Frontend::WindowSystemType GetWindowSystemType() { |
| 250 | : QWidget(parent_), emu_thread(emu_thread) { | 248 | // Determine WSI type based on Qt platform. |
| 249 | QString platform_name = QGuiApplication::platformName(); | ||
| 250 | if (platform_name == QStringLiteral("windows")) | ||
| 251 | return Core::Frontend::WindowSystemType::Windows; | ||
| 252 | else if (platform_name == QStringLiteral("xcb")) | ||
| 253 | return Core::Frontend::WindowSystemType::X11; | ||
| 254 | else if (platform_name == QStringLiteral("wayland")) | ||
| 255 | return Core::Frontend::WindowSystemType::Wayland; | ||
| 256 | |||
| 257 | LOG_CRITICAL(Frontend, "Unknown Qt platform!"); | ||
| 258 | return Core::Frontend::WindowSystemType::Windows; | ||
| 259 | } | ||
| 260 | |||
| 261 | static Core::Frontend::EmuWindow::WindowSystemInfo GetWindowSystemInfo(QWindow* window) { | ||
| 262 | Core::Frontend::EmuWindow::WindowSystemInfo wsi; | ||
| 263 | wsi.type = GetWindowSystemType(); | ||
| 264 | |||
| 265 | #ifdef HAS_VULKAN | ||
| 266 | // Our Win32 Qt external doesn't have the private API. | ||
| 267 | #if defined(WIN32) || defined(__APPLE__) | ||
| 268 | wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; | ||
| 269 | #else | ||
| 270 | QPlatformNativeInterface* pni = QGuiApplication::platformNativeInterface(); | ||
| 271 | wsi.display_connection = pni->nativeResourceForWindow("display", window); | ||
| 272 | if (wsi.type == Core::Frontend::WindowSystemType::Wayland) | ||
| 273 | wsi.render_surface = window ? pni->nativeResourceForWindow("surface", window) : nullptr; | ||
| 274 | else | ||
| 275 | wsi.render_surface = window ? reinterpret_cast<void*>(window->winId()) : nullptr; | ||
| 276 | #endif | ||
| 277 | wsi.render_surface_scale = window ? static_cast<float>(window->devicePixelRatio()) : 1.0f; | ||
| 278 | #endif | ||
| 279 | |||
| 280 | return wsi; | ||
| 281 | } | ||
| 282 | |||
| 283 | GRenderWindow::GRenderWindow(GMainWindow* parent_, EmuThread* emu_thread_) | ||
| 284 | : QWidget(parent_), emu_thread(emu_thread_) { | ||
| 251 | setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") | 285 | setWindowTitle(QStringLiteral("yuzu %1 | %2-%3") |
| 252 | .arg(QString::fromUtf8(Common::g_build_name), | 286 | .arg(QString::fromUtf8(Common::g_build_name), |
| 253 | QString::fromUtf8(Common::g_scm_branch), | 287 | QString::fromUtf8(Common::g_scm_branch), |
| @@ -460,6 +494,9 @@ bool GRenderWindow::InitRenderTarget() { | |||
| 460 | break; | 494 | break; |
| 461 | } | 495 | } |
| 462 | 496 | ||
| 497 | // Update the Window System information with the new render target | ||
| 498 | window_info = GetWindowSystemInfo(child_widget->windowHandle()); | ||
| 499 | |||
| 463 | child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); | 500 | child_widget->resize(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); |
| 464 | layout()->addWidget(child_widget); | 501 | layout()->addWidget(child_widget); |
| 465 | // Reset minimum required size to avoid resizing issues on the main window after restarting. | 502 | // Reset minimum required size to avoid resizing issues on the main window after restarting. |
| @@ -531,30 +568,7 @@ bool GRenderWindow::InitializeOpenGL() { | |||
| 531 | 568 | ||
| 532 | bool GRenderWindow::InitializeVulkan() { | 569 | bool GRenderWindow::InitializeVulkan() { |
| 533 | #ifdef HAS_VULKAN | 570 | #ifdef HAS_VULKAN |
| 534 | vk_instance = std::make_unique<QVulkanInstance>(); | 571 | auto child = new VulkanRenderWidget(this); |
| 535 | vk_instance->setApiVersion(QVersionNumber(1, 1, 0)); | ||
| 536 | vk_instance->setFlags(QVulkanInstance::Flag::NoDebugOutputRedirect); | ||
| 537 | if (Settings::values.renderer_debug) { | ||
| 538 | const auto supported_layers{vk_instance->supportedLayers()}; | ||
| 539 | const bool found = | ||
| 540 | std::find_if(supported_layers.begin(), supported_layers.end(), [](const auto& layer) { | ||
| 541 | constexpr const char searched_layer[] = "VK_LAYER_LUNARG_standard_validation"; | ||
| 542 | return layer.name == searched_layer; | ||
| 543 | }); | ||
| 544 | if (found) { | ||
| 545 | vk_instance->setLayers(QByteArrayList() << "VK_LAYER_LUNARG_standard_validation"); | ||
| 546 | vk_instance->setExtensions(QByteArrayList() << VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | ||
| 547 | } | ||
| 548 | } | ||
| 549 | if (!vk_instance->create()) { | ||
| 550 | QMessageBox::critical( | ||
| 551 | this, tr("Error while initializing Vulkan 1.1!"), | ||
| 552 | tr("Your OS doesn't seem to support Vulkan 1.1 instances, or you do not have the " | ||
| 553 | "latest graphics drivers.")); | ||
| 554 | return false; | ||
| 555 | } | ||
| 556 | |||
| 557 | auto child = new VulkanRenderWidget(this, vk_instance.get()); | ||
| 558 | child_widget = child; | 572 | child_widget = child; |
| 559 | child_widget->windowHandle()->create(); | 573 | child_widget->windowHandle()->create(); |
| 560 | main_context = std::make_unique<DummyContext>(); | 574 | main_context = std::make_unique<DummyContext>(); |
| @@ -567,21 +581,6 @@ bool GRenderWindow::InitializeVulkan() { | |||
| 567 | #endif | 581 | #endif |
| 568 | } | 582 | } |
| 569 | 583 | ||
| 570 | void GRenderWindow::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 571 | void* surface) const { | ||
| 572 | #ifdef HAS_VULKAN | ||
| 573 | const auto instance_proc_addr = vk_instance->getInstanceProcAddr("vkGetInstanceProcAddr"); | ||
| 574 | const VkInstance instance_copy = vk_instance->vkInstance(); | ||
| 575 | const VkSurfaceKHR surface_copy = vk_instance->surfaceForWindow(child_widget->windowHandle()); | ||
| 576 | |||
| 577 | std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); | ||
| 578 | std::memcpy(instance, &instance_copy, sizeof(instance_copy)); | ||
| 579 | std::memcpy(surface, &surface_copy, sizeof(surface_copy)); | ||
| 580 | #else | ||
| 581 | UNREACHABLE_MSG("Executing Vulkan code without compiling Vulkan"); | ||
| 582 | #endif | ||
| 583 | } | ||
| 584 | |||
| 585 | bool GRenderWindow::LoadOpenGL() { | 584 | bool GRenderWindow::LoadOpenGL() { |
| 586 | auto context = CreateSharedContext(); | 585 | auto context = CreateSharedContext(); |
| 587 | auto scope = context->Acquire(); | 586 | auto scope = context->Acquire(); |
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h index d69078df1..3626604ca 100644 --- a/src/yuzu/bootmanager.h +++ b/src/yuzu/bootmanager.h | |||
| @@ -22,9 +22,6 @@ class GMainWindow; | |||
| 22 | class QKeyEvent; | 22 | class QKeyEvent; |
| 23 | class QTouchEvent; | 23 | class QTouchEvent; |
| 24 | class QStringList; | 24 | class QStringList; |
| 25 | #ifdef HAS_VULKAN | ||
| 26 | class QVulkanInstance; | ||
| 27 | #endif | ||
| 28 | 25 | ||
| 29 | namespace VideoCore { | 26 | namespace VideoCore { |
| 30 | enum class LoadCallbackStage; | 27 | enum class LoadCallbackStage; |
| @@ -122,8 +119,6 @@ public: | |||
| 122 | // EmuWindow implementation. | 119 | // EmuWindow implementation. |
| 123 | void PollEvents() override; | 120 | void PollEvents() override; |
| 124 | bool IsShown() const override; | 121 | bool IsShown() const override; |
| 125 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 126 | void* surface) const override; | ||
| 127 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; | 122 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; |
| 128 | 123 | ||
| 129 | void BackupGeometry(); | 124 | void BackupGeometry(); |
| @@ -186,10 +181,6 @@ private: | |||
| 186 | // should instead be shared from | 181 | // should instead be shared from |
| 187 | std::shared_ptr<Core::Frontend::GraphicsContext> main_context; | 182 | std::shared_ptr<Core::Frontend::GraphicsContext> main_context; |
| 188 | 183 | ||
| 189 | #ifdef HAS_VULKAN | ||
| 190 | std::unique_ptr<QVulkanInstance> vk_instance; | ||
| 191 | #endif | ||
| 192 | |||
| 193 | /// Temporary storage of the screenshot taken | 184 | /// Temporary storage of the screenshot taken |
| 194 | QImage screenshot_image; | 185 | QImage screenshot_image; |
| 195 | 186 | ||
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index a821c7b3c..ea667caef 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -15,6 +15,10 @@ | |||
| 15 | #include "ui_configure_graphics.h" | 15 | #include "ui_configure_graphics.h" |
| 16 | #include "yuzu/configuration/configure_graphics.h" | 16 | #include "yuzu/configuration/configure_graphics.h" |
| 17 | 17 | ||
| 18 | #ifdef HAS_VULKAN | ||
| 19 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||
| 20 | #endif | ||
| 21 | |||
| 18 | namespace { | 22 | namespace { |
| 19 | enum class Resolution : int { | 23 | enum class Resolution : int { |
| 20 | Auto, | 24 | Auto, |
| @@ -165,41 +169,9 @@ void ConfigureGraphics::UpdateDeviceComboBox() { | |||
| 165 | 169 | ||
| 166 | void ConfigureGraphics::RetrieveVulkanDevices() { | 170 | void ConfigureGraphics::RetrieveVulkanDevices() { |
| 167 | #ifdef HAS_VULKAN | 171 | #ifdef HAS_VULKAN |
| 168 | QVulkanInstance instance; | 172 | vulkan_devices.clear(); |
| 169 | instance.setApiVersion(QVersionNumber(1, 1, 0)); | 173 | for (auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { |
| 170 | if (!instance.create()) { | 174 | vulkan_devices.push_back(QString::fromStdString(name)); |
| 171 | LOG_INFO(Frontend, "Vulkan 1.1 not available"); | ||
| 172 | return; | ||
| 173 | } | ||
| 174 | const auto vkEnumeratePhysicalDevices{reinterpret_cast<PFN_vkEnumeratePhysicalDevices>( | ||
| 175 | instance.getInstanceProcAddr("vkEnumeratePhysicalDevices"))}; | ||
| 176 | if (vkEnumeratePhysicalDevices == nullptr) { | ||
| 177 | LOG_INFO(Frontend, "Failed to get pointer to vkEnumeratePhysicalDevices"); | ||
| 178 | return; | ||
| 179 | } | ||
| 180 | u32 physical_device_count; | ||
| 181 | if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, nullptr) != | ||
| 182 | VK_SUCCESS) { | ||
| 183 | LOG_INFO(Frontend, "Failed to get physical devices count"); | ||
| 184 | return; | ||
| 185 | } | ||
| 186 | std::vector<VkPhysicalDevice> physical_devices(physical_device_count); | ||
| 187 | if (vkEnumeratePhysicalDevices(instance.vkInstance(), &physical_device_count, | ||
| 188 | physical_devices.data()) != VK_SUCCESS) { | ||
| 189 | LOG_INFO(Frontend, "Failed to get physical devices"); | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | |||
| 193 | const auto vkGetPhysicalDeviceProperties{reinterpret_cast<PFN_vkGetPhysicalDeviceProperties>( | ||
| 194 | instance.getInstanceProcAddr("vkGetPhysicalDeviceProperties"))}; | ||
| 195 | if (vkGetPhysicalDeviceProperties == nullptr) { | ||
| 196 | LOG_INFO(Frontend, "Failed to get pointer to vkGetPhysicalDeviceProperties"); | ||
| 197 | return; | ||
| 198 | } | ||
| 199 | for (const auto physical_device : physical_devices) { | ||
| 200 | VkPhysicalDeviceProperties properties; | ||
| 201 | vkGetPhysicalDeviceProperties(physical_device, &properties); | ||
| 202 | vulkan_devices.push_back(QString::fromUtf8(properties.deviceName)); | ||
| 203 | } | 175 | } |
| 204 | #endif | 176 | #endif |
| 205 | } | 177 | } |
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index 96dec50e2..15ac30f12 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp | |||
| @@ -541,18 +541,19 @@ void ConfigureInputPlayer::HandleClick( | |||
| 541 | button->setText(tr("[press key]")); | 541 | button->setText(tr("[press key]")); |
| 542 | button->setFocus(); | 542 | button->setFocus(); |
| 543 | 543 | ||
| 544 | const auto iter = std::find(button_map.begin(), button_map.end(), button); | 544 | // Keyboard keys can only be used as button devices |
| 545 | ASSERT(iter != button_map.end()); | 545 | want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; |
| 546 | const auto index = std::distance(button_map.begin(), iter); | 546 | if (want_keyboard_keys) { |
| 547 | ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); | 547 | const auto iter = std::find(button_map.begin(), button_map.end(), button); |
| 548 | ASSERT(iter != button_map.end()); | ||
| 549 | const auto index = std::distance(button_map.begin(), iter); | ||
| 550 | ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); | ||
| 551 | } | ||
| 548 | 552 | ||
| 549 | input_setter = new_input_setter; | 553 | input_setter = new_input_setter; |
| 550 | 554 | ||
| 551 | device_pollers = InputCommon::Polling::GetPollers(type); | 555 | device_pollers = InputCommon::Polling::GetPollers(type); |
| 552 | 556 | ||
| 553 | // Keyboard keys can only be used as button devices | ||
| 554 | want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; | ||
| 555 | |||
| 556 | for (auto& poller : device_pollers) { | 557 | for (auto& poller : device_pollers) { |
| 557 | poller->Start(); | 558 | poller->Start(); |
| 558 | } | 559 | } |
diff --git a/src/yuzu/configuration/configure_input_player.ui b/src/yuzu/configuration/configure_input_player.ui index c3a1b68f0..4b37746a1 100644 --- a/src/yuzu/configuration/configure_input_player.ui +++ b/src/yuzu/configuration/configure_input_player.ui | |||
| @@ -927,7 +927,7 @@ | |||
| 927 | </item> | 927 | </item> |
| 928 | </layout> | 928 | </layout> |
| 929 | </item> | 929 | </item> |
| 930 | <item row="2" column="0"> | 930 | <item row="0" column="2"> |
| 931 | <layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout"> | 931 | <layout class="QVBoxLayout" name="buttonShoulderButtonsSLVerticalLayout"> |
| 932 | <item> | 932 | <item> |
| 933 | <layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout"> | 933 | <layout class="QHBoxLayout" name="buttonShoulderButtonsSLHorizontalLayout"> |
| @@ -949,7 +949,7 @@ | |||
| 949 | </item> | 949 | </item> |
| 950 | </layout> | 950 | </layout> |
| 951 | </item> | 951 | </item> |
| 952 | <item row="2" column="1"> | 952 | <item row="1" column="2"> |
| 953 | <layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout"> | 953 | <layout class="QVBoxLayout" name="buttonShoulderButtonsSRVerticalLayout"> |
| 954 | <item> | 954 | <item> |
| 955 | <layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout"> | 955 | <layout class="QHBoxLayout" name="buttonShoulderButtonsSRHorizontalLayout"> |
diff --git a/src/yuzu/configuration/configure_input_simple.cpp b/src/yuzu/configuration/configure_input_simple.cpp index ab3a11d30..0e0e8f113 100644 --- a/src/yuzu/configuration/configure_input_simple.cpp +++ b/src/yuzu/configuration/configure_input_simple.cpp | |||
| @@ -35,6 +35,7 @@ void CallConfigureDialog(ConfigureInputSimple* caller, Args&&... args) { | |||
| 35 | // - Open any dialogs | 35 | // - Open any dialogs |
| 36 | // - Block in any way | 36 | // - Block in any way |
| 37 | 37 | ||
| 38 | constexpr std::size_t PLAYER_0_INDEX = 0; | ||
| 38 | constexpr std::size_t HANDHELD_INDEX = 8; | 39 | constexpr std::size_t HANDHELD_INDEX = 8; |
| 39 | 40 | ||
| 40 | void HandheldOnProfileSelect() { | 41 | void HandheldOnProfileSelect() { |
| @@ -53,8 +54,8 @@ void HandheldOnProfileSelect() { | |||
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | void DualJoyconsDockedOnProfileSelect() { | 56 | void DualJoyconsDockedOnProfileSelect() { |
| 56 | Settings::values.players[0].connected = true; | 57 | Settings::values.players[PLAYER_0_INDEX].connected = true; |
| 57 | Settings::values.players[0].type = Settings::ControllerType::DualJoycon; | 58 | Settings::values.players[PLAYER_0_INDEX].type = Settings::ControllerType::DualJoycon; |
| 58 | 59 | ||
| 59 | for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) { | 60 | for (std::size_t player = 1; player <= HANDHELD_INDEX; ++player) { |
| 60 | Settings::values.players[player].connected = false; | 61 | Settings::values.players[player].connected = false; |
| @@ -64,7 +65,7 @@ void DualJoyconsDockedOnProfileSelect() { | |||
| 64 | Settings::values.keyboard_enabled = false; | 65 | Settings::values.keyboard_enabled = false; |
| 65 | Settings::values.mouse_enabled = false; | 66 | Settings::values.mouse_enabled = false; |
| 66 | Settings::values.debug_pad_enabled = false; | 67 | Settings::values.debug_pad_enabled = false; |
| 67 | Settings::values.touchscreen.enabled = false; | 68 | Settings::values.touchscreen.enabled = true; |
| 68 | } | 69 | } |
| 69 | 70 | ||
| 70 | // Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure | 71 | // Name, OnProfileSelect (called when selected in drop down), OnConfigure (called when configure |
| @@ -78,7 +79,7 @@ constexpr std::array<InputProfile, 3> INPUT_PROFILES{{ | |||
| 78 | }}, | 79 | }}, |
| 79 | {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect, | 80 | {QT_TR_NOOP("Single Player - Dual Joycons - Docked"), DualJoyconsDockedOnProfileSelect, |
| 80 | [](ConfigureInputSimple* caller) { | 81 | [](ConfigureInputSimple* caller) { |
| 81 | CallConfigureDialog<ConfigureInputPlayer>(caller, 1, false); | 82 | CallConfigureDialog<ConfigureInputPlayer>(caller, PLAYER_0_INDEX, false); |
| 82 | }}, | 83 | }}, |
| 83 | {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>}, | 84 | {QT_TR_NOOP("Custom"), [] {}, CallConfigureDialog<ConfigureInput>}, |
| 84 | }}; | 85 | }}; |
diff --git a/src/yuzu/configuration/configure_mouse_advanced.cpp b/src/yuzu/configuration/configure_mouse_advanced.cpp index 0a4abe34f..e0647ea5b 100644 --- a/src/yuzu/configuration/configure_mouse_advanced.cpp +++ b/src/yuzu/configuration/configure_mouse_advanced.cpp | |||
| @@ -184,18 +184,19 @@ void ConfigureMouseAdvanced::HandleClick( | |||
| 184 | button->setText(tr("[press key]")); | 184 | button->setText(tr("[press key]")); |
| 185 | button->setFocus(); | 185 | button->setFocus(); |
| 186 | 186 | ||
| 187 | const auto iter = std::find(button_map.begin(), button_map.end(), button); | 187 | // Keyboard keys can only be used as button devices |
| 188 | ASSERT(iter != button_map.end()); | 188 | want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; |
| 189 | const auto index = std::distance(button_map.begin(), iter); | 189 | if (want_keyboard_keys) { |
| 190 | ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); | 190 | const auto iter = std::find(button_map.begin(), button_map.end(), button); |
| 191 | ASSERT(iter != button_map.end()); | ||
| 192 | const auto index = std::distance(button_map.begin(), iter); | ||
| 193 | ASSERT(index < Settings::NativeButton::NumButtons && index >= 0); | ||
| 194 | } | ||
| 191 | 195 | ||
| 192 | input_setter = new_input_setter; | 196 | input_setter = new_input_setter; |
| 193 | 197 | ||
| 194 | device_pollers = InputCommon::Polling::GetPollers(type); | 198 | device_pollers = InputCommon::Polling::GetPollers(type); |
| 195 | 199 | ||
| 196 | // Keyboard keys can only be used as button devices | ||
| 197 | want_keyboard_keys = type == InputCommon::Polling::DeviceType::Button; | ||
| 198 | |||
| 199 | for (auto& poller : device_pollers) { | 200 | for (auto& poller : device_pollers) { |
| 200 | poller->Start(); | 201 | poller->Start(); |
| 201 | } | 202 | } |
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index a2b88c787..dccbabcbf 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp | |||
| @@ -315,7 +315,7 @@ GameList::GameList(FileSys::VirtualFilesystem vfs, FileSys::ManualContentProvide | |||
| 315 | item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); | 315 | item_model->setHeaderData(COLUMN_FILE_TYPE - 1, Qt::Horizontal, tr("File type")); |
| 316 | item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); | 316 | item_model->setHeaderData(COLUMN_SIZE - 1, Qt::Horizontal, tr("Size")); |
| 317 | } | 317 | } |
| 318 | item_model->setSortRole(GameListItemPath::TitleRole); | 318 | item_model->setSortRole(GameListItemPath::SortRole); |
| 319 | 319 | ||
| 320 | connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); | 320 | connect(main_window, &GMainWindow::UpdateThemedIcons, this, &GameList::onUpdateThemedIcons); |
| 321 | connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); | 321 | connect(tree_view, &QTreeView::activated, this, &GameList::ValidateEntry); |
| @@ -441,6 +441,8 @@ void GameList::DonePopulating(QStringList watch_list) { | |||
| 441 | if (children_total > 0) { | 441 | if (children_total > 0) { |
| 442 | search_field->setFocus(); | 442 | search_field->setFocus(); |
| 443 | } | 443 | } |
| 444 | item_model->sort(tree_view->header()->sortIndicatorSection(), | ||
| 445 | tree_view->header()->sortIndicatorOrder()); | ||
| 444 | } | 446 | } |
| 445 | 447 | ||
| 446 | void GameList::PopupContextMenu(const QPoint& menu_location) { | 448 | void GameList::PopupContextMenu(const QPoint& menu_location) { |
| @@ -666,8 +668,6 @@ void GameList::LoadInterfaceLayout() { | |||
| 666 | // so make it as large as possible as default. | 668 | // so make it as large as possible as default. |
| 667 | header->resizeSection(COLUMN_NAME, header->width()); | 669 | header->resizeSection(COLUMN_NAME, header->width()); |
| 668 | } | 670 | } |
| 669 | |||
| 670 | item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); | ||
| 671 | } | 671 | } |
| 672 | 672 | ||
| 673 | const QStringList GameList::supported_file_extensions = { | 673 | const QStringList GameList::supported_file_extensions = { |
diff --git a/src/yuzu/game_list_p.h b/src/yuzu/game_list_p.h index 7cde72d1b..3e6d5a7cd 100644 --- a/src/yuzu/game_list_p.h +++ b/src/yuzu/game_list_p.h | |||
| @@ -65,10 +65,10 @@ public: | |||
| 65 | */ | 65 | */ |
| 66 | class GameListItemPath : public GameListItem { | 66 | class GameListItemPath : public GameListItem { |
| 67 | public: | 67 | public: |
| 68 | static const int TitleRole = SortRole; | 68 | static const int TitleRole = SortRole + 1; |
| 69 | static const int FullPathRole = SortRole + 1; | 69 | static const int FullPathRole = SortRole + 2; |
| 70 | static const int ProgramIdRole = SortRole + 2; | 70 | static const int ProgramIdRole = SortRole + 3; |
| 71 | static const int FileTypeRole = SortRole + 3; | 71 | static const int FileTypeRole = SortRole + 4; |
| 72 | 72 | ||
| 73 | GameListItemPath() = default; | 73 | GameListItemPath() = default; |
| 74 | GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, | 74 | GameListItemPath(const QString& game_path, const std::vector<u8>& picture_data, |
| @@ -95,7 +95,7 @@ public: | |||
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | QVariant data(int role) const override { | 97 | QVariant data(int role) const override { |
| 98 | if (role == Qt::DisplayRole) { | 98 | if (role == Qt::DisplayRole || role == SortRole) { |
| 99 | std::string filename; | 99 | std::string filename; |
| 100 | Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, | 100 | Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, |
| 101 | nullptr); | 101 | nullptr); |
| @@ -110,6 +110,9 @@ public: | |||
| 110 | const auto& row1 = row_data.at(UISettings::values.row_1_text_id); | 110 | const auto& row1 = row_data.at(UISettings::values.row_1_text_id); |
| 111 | const int row2_id = UISettings::values.row_2_text_id; | 111 | const int row2_id = UISettings::values.row_2_text_id; |
| 112 | 112 | ||
| 113 | if (role == SortRole) | ||
| 114 | return row1.toLower(); | ||
| 115 | |||
| 113 | if (row2_id == 4) // None | 116 | if (row2_id == 4) // None |
| 114 | return row1; | 117 | return row1; |
| 115 | 118 | ||
| @@ -123,6 +126,13 @@ public: | |||
| 123 | 126 | ||
| 124 | return GameListItem::data(role); | 127 | return GameListItem::data(role); |
| 125 | } | 128 | } |
| 129 | |||
| 130 | /** | ||
| 131 | * Override to prevent automatic sorting. | ||
| 132 | */ | ||
| 133 | bool operator<(const QStandardItem& other) const override { | ||
| 134 | return false; | ||
| 135 | } | ||
| 126 | }; | 136 | }; |
| 127 | 137 | ||
| 128 | class GameListItemCompat : public GameListItem { | 138 | class GameListItemCompat : public GameListItem { |
| @@ -289,6 +299,10 @@ public: | |||
| 289 | int type() const override { | 299 | int type() const override { |
| 290 | return static_cast<int>(GameListItemType::AddDir); | 300 | return static_cast<int>(GameListItemType::AddDir); |
| 291 | } | 301 | } |
| 302 | |||
| 303 | bool operator<(const QStandardItem& other) const override { | ||
| 304 | return false; | ||
| 305 | } | ||
| 292 | }; | 306 | }; |
| 293 | 307 | ||
| 294 | class GameList; | 308 | class GameList; |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 940f24dc8..1717e06f9 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -205,7 +205,13 @@ GMainWindow::GMainWindow() | |||
| 205 | ConnectMenuEvents(); | 205 | ConnectMenuEvents(); |
| 206 | ConnectWidgetEvents(); | 206 | ConnectWidgetEvents(); |
| 207 | 207 | ||
| 208 | LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch, | 208 | const auto build_id = std::string(Common::g_build_id); |
| 209 | const auto fmt = std::string(Common::g_title_bar_format_idle); | ||
| 210 | const auto yuzu_build_version = | ||
| 211 | fmt::format(fmt.empty() ? "yuzu Development Build" : fmt, std::string{}, std::string{}, | ||
| 212 | std::string{}, std::string{}, std::string{}, build_id); | ||
| 213 | |||
| 214 | LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", yuzu_build_version, Common::g_scm_branch, | ||
| 209 | Common::g_scm_desc); | 215 | Common::g_scm_desc); |
| 210 | #ifdef ARCHITECTURE_x86_64 | 216 | #ifdef ARCHITECTURE_x86_64 |
| 211 | LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); | 217 | LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 3522dcf6d..411e7e647 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | |||
| @@ -156,12 +156,6 @@ EmuWindow_SDL2_GL::~EmuWindow_SDL2_GL() { | |||
| 156 | SDL_GL_DeleteContext(window_context); | 156 | SDL_GL_DeleteContext(window_context); |
| 157 | } | 157 | } |
| 158 | 158 | ||
| 159 | void EmuWindow_SDL2_GL::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 160 | void* surface) const { | ||
| 161 | // Should not have been called from OpenGL | ||
| 162 | UNREACHABLE(); | ||
| 163 | } | ||
| 164 | |||
| 165 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { | 159 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_GL::CreateSharedContext() const { |
| 166 | return std::make_unique<SDLGLContext>(); | 160 | return std::make_unique<SDLGLContext>(); |
| 167 | } | 161 | } |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h index e092021d7..48bb41683 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.h | |||
| @@ -15,10 +15,6 @@ public: | |||
| 15 | 15 | ||
| 16 | void Present() override; | 16 | void Present() override; |
| 17 | 17 | ||
| 18 | /// Ignored in OpenGL | ||
| 19 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 20 | void* surface) const override; | ||
| 21 | |||
| 22 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; | 18 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; |
| 23 | 19 | ||
| 24 | private: | 20 | private: |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp index 46d053f04..f2990910e 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.cpp | |||
| @@ -2,102 +2,62 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <cstdlib> |
| 6 | #include <memory> | ||
| 6 | #include <string> | 7 | #include <string> |
| 7 | #include <vector> | 8 | |
| 8 | #include <SDL.h> | ||
| 9 | #include <SDL_vulkan.h> | ||
| 10 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 11 | #include <vulkan/vulkan.h> | 10 | |
| 12 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 14 | #include "common/scm_rev.h" | 13 | #include "common/scm_rev.h" |
| 15 | #include "core/settings.h" | 14 | #include "core/settings.h" |
| 15 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||
| 16 | #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" | 16 | #include "yuzu_cmd/emu_window/emu_window_sdl2_vk.h" |
| 17 | 17 | ||
| 18 | // Include these late to avoid polluting everything with Xlib macros | ||
| 19 | #include <SDL.h> | ||
| 20 | #include <SDL_syswm.h> | ||
| 21 | |||
| 18 | EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) | 22 | EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) |
| 19 | : EmuWindow_SDL2{system, fullscreen} { | 23 | : EmuWindow_SDL2{system, fullscreen} { |
| 20 | if (SDL_Vulkan_LoadLibrary(nullptr) != 0) { | ||
| 21 | LOG_CRITICAL(Frontend, "SDL failed to load the Vulkan library: {}", SDL_GetError()); | ||
| 22 | exit(EXIT_FAILURE); | ||
| 23 | } | ||
| 24 | |||
| 25 | vkGetInstanceProcAddr = | ||
| 26 | reinterpret_cast<PFN_vkGetInstanceProcAddr>(SDL_Vulkan_GetVkGetInstanceProcAddr()); | ||
| 27 | if (vkGetInstanceProcAddr == nullptr) { | ||
| 28 | LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); | ||
| 29 | exit(EXIT_FAILURE); | ||
| 30 | } | ||
| 31 | |||
| 32 | const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, | 24 | const std::string window_title = fmt::format("yuzu {} | {}-{} (Vulkan)", Common::g_build_name, |
| 33 | Common::g_scm_branch, Common::g_scm_desc); | 25 | Common::g_scm_branch, Common::g_scm_desc); |
| 34 | render_window = | 26 | render_window = |
| 35 | SDL_CreateWindow(window_title.c_str(), | 27 | SDL_CreateWindow(window_title.c_str(), SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, |
| 36 | SDL_WINDOWPOS_UNDEFINED, // x position | ||
| 37 | SDL_WINDOWPOS_UNDEFINED, // y position | ||
| 38 | Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, | 28 | Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height, |
| 39 | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI | SDL_WINDOW_VULKAN); | 29 | SDL_WINDOW_RESIZABLE | SDL_WINDOW_ALLOW_HIGHDPI); |
| 40 | |||
| 41 | const bool use_standard_layers = UseStandardLayers(vkGetInstanceProcAddr); | ||
| 42 | |||
| 43 | u32 extra_ext_count{}; | ||
| 44 | if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, NULL)) { | ||
| 45 | LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions count from SDL! {}", | ||
| 46 | SDL_GetError()); | ||
| 47 | exit(1); | ||
| 48 | } | ||
| 49 | |||
| 50 | auto extra_ext_names = std::make_unique<const char* []>(extra_ext_count); | ||
| 51 | if (!SDL_Vulkan_GetInstanceExtensions(render_window, &extra_ext_count, extra_ext_names.get())) { | ||
| 52 | LOG_CRITICAL(Frontend, "Failed to query Vulkan extensions from SDL! {}", SDL_GetError()); | ||
| 53 | exit(1); | ||
| 54 | } | ||
| 55 | std::vector<const char*> enabled_extensions; | ||
| 56 | enabled_extensions.insert(enabled_extensions.begin(), extra_ext_names.get(), | ||
| 57 | extra_ext_names.get() + extra_ext_count); | ||
| 58 | |||
| 59 | std::vector<const char*> enabled_layers; | ||
| 60 | if (use_standard_layers) { | ||
| 61 | enabled_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | ||
| 62 | enabled_layers.push_back("VK_LAYER_LUNARG_standard_validation"); | ||
| 63 | } | ||
| 64 | |||
| 65 | VkApplicationInfo app_info{}; | ||
| 66 | app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; | ||
| 67 | app_info.apiVersion = VK_API_VERSION_1_1; | ||
| 68 | app_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); | ||
| 69 | app_info.pApplicationName = "yuzu-emu"; | ||
| 70 | app_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); | ||
| 71 | app_info.pEngineName = "yuzu-emu"; | ||
| 72 | 30 | ||
| 73 | VkInstanceCreateInfo instance_ci{}; | 31 | SDL_SysWMinfo wm; |
| 74 | instance_ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; | 32 | if (SDL_GetWindowWMInfo(render_window, &wm) == SDL_FALSE) { |
| 75 | instance_ci.pApplicationInfo = &app_info; | 33 | LOG_CRITICAL(Frontend, "Failed to get information from the window manager"); |
| 76 | instance_ci.enabledExtensionCount = static_cast<u32>(enabled_extensions.size()); | 34 | std::exit(EXIT_FAILURE); |
| 77 | instance_ci.ppEnabledExtensionNames = enabled_extensions.data(); | ||
| 78 | if (Settings::values.renderer_debug) { | ||
| 79 | instance_ci.enabledLayerCount = static_cast<u32>(enabled_layers.size()); | ||
| 80 | instance_ci.ppEnabledLayerNames = enabled_layers.data(); | ||
| 81 | } | 35 | } |
| 82 | 36 | ||
| 83 | const auto vkCreateInstance = | 37 | switch (wm.subsystem) { |
| 84 | reinterpret_cast<PFN_vkCreateInstance>(vkGetInstanceProcAddr(nullptr, "vkCreateInstance")); | 38 | #ifdef SDL_VIDEO_DRIVER_WINDOWS |
| 85 | if (vkCreateInstance == nullptr || | 39 | case SDL_SYSWM_TYPE::SDL_SYSWM_WINDOWS: |
| 86 | vkCreateInstance(&instance_ci, nullptr, &vk_instance) != VK_SUCCESS) { | 40 | window_info.type = Core::Frontend::WindowSystemType::Windows; |
| 87 | LOG_CRITICAL(Frontend, "Failed to create Vulkan instance!"); | 41 | window_info.render_surface = reinterpret_cast<void*>(wm.info.win.window); |
| 88 | exit(EXIT_FAILURE); | 42 | break; |
| 89 | } | 43 | #endif |
| 90 | 44 | #ifdef SDL_VIDEO_DRIVER_X11 | |
| 91 | vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>( | 45 | case SDL_SYSWM_TYPE::SDL_SYSWM_X11: |
| 92 | vkGetInstanceProcAddr(vk_instance, "vkDestroyInstance")); | 46 | window_info.type = Core::Frontend::WindowSystemType::X11; |
| 93 | if (vkDestroyInstance == nullptr) { | 47 | window_info.display_connection = wm.info.x11.display; |
| 94 | LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); | 48 | window_info.render_surface = reinterpret_cast<void*>(wm.info.x11.window); |
| 95 | exit(EXIT_FAILURE); | 49 | break; |
| 96 | } | 50 | #endif |
| 97 | 51 | #ifdef SDL_VIDEO_DRIVER_WAYLAND | |
| 98 | if (!SDL_Vulkan_CreateSurface(render_window, vk_instance, &vk_surface)) { | 52 | case SDL_SYSWM_TYPE::SDL_SYSWM_WAYLAND: |
| 99 | LOG_CRITICAL(Frontend, "Failed to create Vulkan surface! {}", SDL_GetError()); | 53 | window_info.type = Core::Frontend::WindowSystemType::Wayland; |
| 100 | exit(EXIT_FAILURE); | 54 | window_info.display_connection = wm.info.wl.display; |
| 55 | window_info.render_surface = wm.info.wl.surface; | ||
| 56 | break; | ||
| 57 | #endif | ||
| 58 | default: | ||
| 59 | LOG_CRITICAL(Frontend, "Window manager subsystem not implemented"); | ||
| 60 | std::exit(EXIT_FAILURE); | ||
| 101 | } | 61 | } |
| 102 | 62 | ||
| 103 | OnResize(); | 63 | OnResize(); |
| @@ -107,51 +67,12 @@ EmuWindow_SDL2_VK::EmuWindow_SDL2_VK(Core::System& system, bool fullscreen) | |||
| 107 | Common::g_scm_branch, Common::g_scm_desc); | 67 | Common::g_scm_branch, Common::g_scm_desc); |
| 108 | } | 68 | } |
| 109 | 69 | ||
| 110 | EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() { | 70 | EmuWindow_SDL2_VK::~EmuWindow_SDL2_VK() = default; |
| 111 | vkDestroyInstance(vk_instance, nullptr); | ||
| 112 | } | ||
| 113 | |||
| 114 | void EmuWindow_SDL2_VK::RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 115 | void* surface) const { | ||
| 116 | const auto instance_proc_addr = vkGetInstanceProcAddr; | ||
| 117 | std::memcpy(get_instance_proc_addr, &instance_proc_addr, sizeof(instance_proc_addr)); | ||
| 118 | std::memcpy(instance, &vk_instance, sizeof(vk_instance)); | ||
| 119 | std::memcpy(surface, &vk_surface, sizeof(vk_surface)); | ||
| 120 | } | ||
| 121 | 71 | ||
| 122 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { | 72 | std::unique_ptr<Core::Frontend::GraphicsContext> EmuWindow_SDL2_VK::CreateSharedContext() const { |
| 123 | return nullptr; | 73 | return nullptr; |
| 124 | } | 74 | } |
| 125 | 75 | ||
| 126 | bool EmuWindow_SDL2_VK::UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const { | ||
| 127 | if (!Settings::values.renderer_debug) { | ||
| 128 | return false; | ||
| 129 | } | ||
| 130 | |||
| 131 | const auto vkEnumerateInstanceLayerProperties = | ||
| 132 | reinterpret_cast<PFN_vkEnumerateInstanceLayerProperties>( | ||
| 133 | vkGetInstanceProcAddr(nullptr, "vkEnumerateInstanceLayerProperties")); | ||
| 134 | if (vkEnumerateInstanceLayerProperties == nullptr) { | ||
| 135 | LOG_CRITICAL(Frontend, "Failed to retrieve Vulkan function pointer!"); | ||
| 136 | return false; | ||
| 137 | } | ||
| 138 | |||
| 139 | u32 available_layers_count{}; | ||
| 140 | if (vkEnumerateInstanceLayerProperties(&available_layers_count, nullptr) != VK_SUCCESS) { | ||
| 141 | LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); | ||
| 142 | return false; | ||
| 143 | } | ||
| 144 | std::vector<VkLayerProperties> layers(available_layers_count); | ||
| 145 | if (vkEnumerateInstanceLayerProperties(&available_layers_count, layers.data()) != VK_SUCCESS) { | ||
| 146 | LOG_CRITICAL(Frontend, "Failed to enumerate Vulkan validation layers!"); | ||
| 147 | return false; | ||
| 148 | } | ||
| 149 | |||
| 150 | return std::find_if(layers.begin(), layers.end(), [&](const auto& layer) { | ||
| 151 | return layer.layerName == std::string("VK_LAYER_LUNARG_standard_validation"); | ||
| 152 | }) != layers.end(); | ||
| 153 | } | ||
| 154 | |||
| 155 | void EmuWindow_SDL2_VK::Present() { | 76 | void EmuWindow_SDL2_VK::Present() { |
| 156 | // TODO (bunnei): ImplementMe | 77 | // TODO (bunnei): ImplementMe |
| 157 | } | 78 | } |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h index 3dd1f3f61..b8021ebea 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_vk.h | |||
| @@ -4,27 +4,21 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vulkan/vulkan.h> | 7 | #include <memory> |
| 8 | |||
| 8 | #include "core/frontend/emu_window.h" | 9 | #include "core/frontend/emu_window.h" |
| 9 | #include "yuzu_cmd/emu_window/emu_window_sdl2.h" | 10 | #include "yuzu_cmd/emu_window/emu_window_sdl2.h" |
| 10 | 11 | ||
| 12 | namespace Core { | ||
| 13 | class System; | ||
| 14 | } | ||
| 15 | |||
| 11 | class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { | 16 | class EmuWindow_SDL2_VK final : public EmuWindow_SDL2 { |
| 12 | public: | 17 | public: |
| 13 | explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); | 18 | explicit EmuWindow_SDL2_VK(Core::System& system, bool fullscreen); |
| 14 | ~EmuWindow_SDL2_VK(); | 19 | ~EmuWindow_SDL2_VK(); |
| 15 | 20 | ||
| 16 | void Present() override; | 21 | void Present() override; |
| 17 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 18 | void* surface) const override; | ||
| 19 | 22 | ||
| 20 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; | 23 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; |
| 21 | |||
| 22 | private: | ||
| 23 | bool UseStandardLayers(PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr) const; | ||
| 24 | |||
| 25 | VkInstance vk_instance{}; | ||
| 26 | VkSurfaceKHR vk_surface{}; | ||
| 27 | |||
| 28 | PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; | ||
| 29 | PFN_vkDestroyInstance vkDestroyInstance{}; | ||
| 30 | }; | 24 | }; |
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp index a837430cc..8584f6671 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.cpp | |||
| @@ -116,10 +116,6 @@ bool EmuWindow_SDL2_Hide::IsShown() const { | |||
| 116 | return false; | 116 | return false; |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | void EmuWindow_SDL2_Hide::RetrieveVulkanHandlers(void*, void*, void*) const { | ||
| 120 | UNREACHABLE(); | ||
| 121 | } | ||
| 122 | |||
| 123 | class SDLGLContext : public Core::Frontend::GraphicsContext { | 119 | class SDLGLContext : public Core::Frontend::GraphicsContext { |
| 124 | public: | 120 | public: |
| 125 | explicit SDLGLContext() { | 121 | explicit SDLGLContext() { |
diff --git a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h index 9f5d04fca..c13a82df2 100644 --- a/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h +++ b/src/yuzu_tester/emu_window/emu_window_sdl2_hide.h | |||
| @@ -19,10 +19,6 @@ public: | |||
| 19 | /// Whether the screen is being shown or not. | 19 | /// Whether the screen is being shown or not. |
| 20 | bool IsShown() const override; | 20 | bool IsShown() const override; |
| 21 | 21 | ||
| 22 | /// Retrieves Vulkan specific handlers from the window | ||
| 23 | void RetrieveVulkanHandlers(void* get_instance_proc_addr, void* instance, | ||
| 24 | void* surface) const override; | ||
| 25 | |||
| 26 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; | 22 | std::unique_ptr<Core::Frontend::GraphicsContext> CreateSharedContext() const override; |
| 27 | 23 | ||
| 28 | private: | 24 | private: |