diff options
116 files changed, 6159 insertions, 1856 deletions
diff --git a/.gitmodules b/.gitmodules index bf3b80d59..9ba8fe207 100644 --- a/.gitmodules +++ b/.gitmodules | |||
| @@ -13,6 +13,9 @@ | |||
| 13 | [submodule "soundtouch"] | 13 | [submodule "soundtouch"] |
| 14 | path = externals/soundtouch | 14 | path = externals/soundtouch |
| 15 | url = https://github.com/citra-emu/ext-soundtouch.git | 15 | url = https://github.com/citra-emu/ext-soundtouch.git |
| 16 | [submodule "libressl"] | ||
| 17 | path = externals/libressl | ||
| 18 | url = https://github.com/citra-emu/ext-libressl-portable.git | ||
| 16 | [submodule "discord-rpc"] | 19 | [submodule "discord-rpc"] |
| 17 | path = externals/discord-rpc | 20 | path = externals/discord-rpc |
| 18 | url = https://github.com/discordapp/discord-rpc.git | 21 | url = https://github.com/discordapp/discord-rpc.git |
| @@ -28,3 +31,6 @@ | |||
| 28 | [submodule "libzip"] | 31 | [submodule "libzip"] |
| 29 | path = externals/libzip/libzip | 32 | path = externals/libzip/libzip |
| 30 | url = https://github.com/nih-at/libzip.git | 33 | url = https://github.com/nih-at/libzip.git |
| 34 | [submodule "xbyak"] | ||
| 35 | path = externals/xbyak | ||
| 36 | url = https://github.com/herumi/xbyak.git | ||
diff --git a/CMakeLists.txt b/CMakeLists.txt index 61321bf0a..b71071271 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.11) | 1 | cmake_minimum_required(VERSION 3.15) |
| 2 | 2 | ||
| 3 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") | 3 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") |
| 4 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") | 4 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") |
| @@ -13,7 +13,7 @@ project(yuzu) | |||
| 13 | option(ENABLE_SDL2 "Enable the SDL2 frontend" ON) | 13 | option(ENABLE_SDL2 "Enable the SDL2 frontend" ON) |
| 14 | 14 | ||
| 15 | option(ENABLE_QT "Enable the Qt frontend" ON) | 15 | option(ENABLE_QT "Enable the Qt frontend" ON) |
| 16 | CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF) | 16 | CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF) |
| 17 | 17 | ||
| 18 | option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) | 18 | option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) |
| 19 | 19 | ||
| @@ -152,7 +152,6 @@ macro(yuzu_find_packages) | |||
| 152 | "Boost 1.71 boost/1.72.0" | 152 | "Boost 1.71 boost/1.72.0" |
| 153 | "Catch2 2.11 catch2/2.11.0" | 153 | "Catch2 2.11 catch2/2.11.0" |
| 154 | "fmt 6.2 fmt/6.2.0" | 154 | "fmt 6.2 fmt/6.2.0" |
| 155 | "OpenSSL 1.1 openssl/1.1.1f" | ||
| 156 | # can't use until https://github.com/bincrafters/community/issues/1173 | 155 | # can't use until https://github.com/bincrafters/community/issues/1173 |
| 157 | #"libzip 1.5 libzip/1.5.2@bincrafters/stable" | 156 | #"libzip 1.5 libzip/1.5.2@bincrafters/stable" |
| 158 | "lz4 1.8 lz4/1.9.2" | 157 | "lz4 1.8 lz4/1.9.2" |
| @@ -312,15 +311,6 @@ elseif (TARGET Boost::boost) | |||
| 312 | add_library(boost ALIAS Boost::boost) | 311 | add_library(boost ALIAS Boost::boost) |
| 313 | endif() | 312 | endif() |
| 314 | 313 | ||
| 315 | if (NOT TARGET OpenSSL::SSL) | ||
| 316 | set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE) | ||
| 317 | add_library(OpenSSL::SSL ALIAS OpenSSL::OpenSSL) | ||
| 318 | endif() | ||
| 319 | if (NOT TARGET OpenSSL::Crypto) | ||
| 320 | set_target_properties(OpenSSL::OpenSSL PROPERTIES IMPORTED_GLOBAL TRUE) | ||
| 321 | add_library(OpenSSL::Crypto ALIAS OpenSSL::OpenSSL) | ||
| 322 | endif() | ||
| 323 | |||
| 324 | if (TARGET sdl2::sdl2) | 314 | if (TARGET sdl2::sdl2) |
| 325 | # imported from the conan generated sdl2Config.cmake | 315 | # imported from the conan generated sdl2Config.cmake |
| 326 | set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE) | 316 | set_target_properties(sdl2::sdl2 PROPERTIES IMPORTED_GLOBAL TRUE) |
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 83e4e9df2..311ba1c2e 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -51,6 +51,8 @@ endif() | |||
| 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) | 51 | # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR) |
| 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") | 52 | set(VIDEO_CORE "${SRC_DIR}/src/video_core") |
| 53 | set(HASH_FILES | 53 | set(HASH_FILES |
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 54 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 55 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 57 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 56 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 58 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 0b40cd1b0..b80b27605 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt | |||
| @@ -4,6 +4,13 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMakeModules") | |||
| 4 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules") | 4 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/externals/find-modules") |
| 5 | include(DownloadExternals) | 5 | include(DownloadExternals) |
| 6 | 6 | ||
| 7 | # xbyak | ||
| 8 | if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) | ||
| 9 | add_library(xbyak INTERFACE) | ||
| 10 | target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak) | ||
| 11 | target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES) | ||
| 12 | endif() | ||
| 13 | |||
| 7 | # Catch | 14 | # Catch |
| 8 | add_library(catch-single-include INTERFACE) | 15 | add_library(catch-single-include INTERFACE) |
| 9 | target_include_directories(catch-single-include INTERFACE catch/single_include) | 16 | target_include_directories(catch-single-include INTERFACE catch/single_include) |
| @@ -66,6 +73,15 @@ if (NOT LIBZIP_FOUND) | |||
| 66 | endif() | 73 | endif() |
| 67 | 74 | ||
| 68 | if (ENABLE_WEB_SERVICE) | 75 | if (ENABLE_WEB_SERVICE) |
| 76 | # LibreSSL | ||
| 77 | set(LIBRESSL_SKIP_INSTALL ON CACHE BOOL "") | ||
| 78 | add_subdirectory(libressl EXCLUDE_FROM_ALL) | ||
| 79 | target_include_directories(ssl INTERFACE ./libressl/include) | ||
| 80 | target_compile_definitions(ssl PRIVATE -DHAVE_INET_NTOP) | ||
| 81 | get_directory_property(OPENSSL_LIBRARIES | ||
| 82 | DIRECTORY libressl | ||
| 83 | DEFINITION OPENSSL_LIBS) | ||
| 84 | |||
| 69 | # lurlparser | 85 | # lurlparser |
| 70 | add_subdirectory(lurlparser EXCLUDE_FROM_ALL) | 86 | add_subdirectory(lurlparser EXCLUDE_FROM_ALL) |
| 71 | 87 | ||
| @@ -73,5 +89,5 @@ if (ENABLE_WEB_SERVICE) | |||
| 73 | add_library(httplib INTERFACE) | 89 | add_library(httplib INTERFACE) |
| 74 | target_include_directories(httplib INTERFACE ./httplib) | 90 | target_include_directories(httplib INTERFACE ./httplib) |
| 75 | target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) | 91 | target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) |
| 76 | target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) | 92 | target_link_libraries(httplib INTERFACE ${OPENSSL_LIBRARIES}) |
| 77 | endif() | 93 | endif() |
diff --git a/externals/libressl b/externals/libressl new file mode 160000 | |||
| Subproject 7d01cb01cb1a926ecb4c9c98b107ef3c26f59df | |||
diff --git a/externals/sirit b/externals/sirit | |||
| Subproject 414fc4dbd28d8fe48f735a0c389db8a234f733c | Subproject eefca56afd49379bdebc97ded8b480839f93088 | ||
diff --git a/externals/xbyak b/externals/xbyak new file mode 160000 | |||
| Subproject 82b70e665918efc2ee348091742fd0237b3b68c | |||
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e6769a5f3..0a3e2f4d1 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -32,6 +32,8 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 32 | DEPENDS | 32 | DEPENDS |
| 33 | # WARNING! It was too much work to try and make a common location for this list, | 33 | # WARNING! It was too much work to try and make a common location for this list, |
| 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well | 34 | # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well |
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp" | ||
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h" | ||
| 35 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" | 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp" |
| 36 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" | 38 | "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h" |
| 37 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" | 39 | "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp" |
| @@ -123,6 +125,8 @@ add_library(common STATIC | |||
| 123 | lz4_compression.cpp | 125 | lz4_compression.cpp |
| 124 | lz4_compression.h | 126 | lz4_compression.h |
| 125 | math_util.h | 127 | math_util.h |
| 128 | memory_detect.cpp | ||
| 129 | memory_detect.h | ||
| 126 | memory_hook.cpp | 130 | memory_hook.cpp |
| 127 | memory_hook.h | 131 | memory_hook.h |
| 128 | microprofile.cpp | 132 | microprofile.cpp |
| @@ -169,10 +173,12 @@ if(ARCHITECTURE_x86_64) | |||
| 169 | PRIVATE | 173 | PRIVATE |
| 170 | x64/cpu_detect.cpp | 174 | x64/cpu_detect.cpp |
| 171 | x64/cpu_detect.h | 175 | x64/cpu_detect.h |
| 176 | x64/xbyak_abi.h | ||
| 177 | x64/xbyak_util.h | ||
| 172 | ) | 178 | ) |
| 173 | endif() | 179 | endif() |
| 174 | 180 | ||
| 175 | create_target_directory_groups(common) | 181 | create_target_directory_groups(common) |
| 176 | 182 | ||
| 177 | target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) | 183 | target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) |
| 178 | target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) | 184 | target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) |
diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp new file mode 100644 index 000000000..3fdc309a2 --- /dev/null +++ b/src/common/memory_detect.cpp | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #ifdef _WIN32 | ||
| 6 | // clang-format off | ||
| 7 | #include <windows.h> | ||
| 8 | #include <sysinfoapi.h> | ||
| 9 | // clang-format on | ||
| 10 | #else | ||
| 11 | #include <sys/types.h> | ||
| 12 | #ifdef __APPLE__ | ||
| 13 | #include <sys/sysctl.h> | ||
| 14 | #else | ||
| 15 | #include <sys/sysinfo.h> | ||
| 16 | #endif | ||
| 17 | #endif | ||
| 18 | |||
| 19 | #include "common/memory_detect.h" | ||
| 20 | |||
| 21 | namespace Common { | ||
| 22 | |||
| 23 | // Detects the RAM and Swapfile sizes | ||
| 24 | static MemoryInfo Detect() { | ||
| 25 | MemoryInfo mem_info{}; | ||
| 26 | |||
| 27 | #ifdef _WIN32 | ||
| 28 | MEMORYSTATUSEX memorystatus; | ||
| 29 | memorystatus.dwLength = sizeof(memorystatus); | ||
| 30 | GlobalMemoryStatusEx(&memorystatus); | ||
| 31 | mem_info.TotalPhysicalMemory = memorystatus.ullTotalPhys; | ||
| 32 | mem_info.TotalSwapMemory = memorystatus.ullTotalPageFile - mem_info.TotalPhysicalMemory; | ||
| 33 | #elif defined(__APPLE__) | ||
| 34 | u64 ramsize; | ||
| 35 | struct xsw_usage vmusage; | ||
| 36 | std::size_t sizeof_ramsize = sizeof(ramsize); | ||
| 37 | std::size_t sizeof_vmusage = sizeof(vmusage); | ||
| 38 | // hw and vm are defined in sysctl.h | ||
| 39 | // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471 | ||
| 40 | // sysctlbyname(const char *, void *, size_t *, void *, size_t); | ||
| 41 | sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0); | ||
| 42 | sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0); | ||
| 43 | mem_info.TotalPhysicalMemory = ramsize; | ||
| 44 | mem_info.TotalSwapMemory = vmusage.xsu_total; | ||
| 45 | #else | ||
| 46 | struct sysinfo meminfo; | ||
| 47 | sysinfo(&meminfo); | ||
| 48 | mem_info.TotalPhysicalMemory = meminfo.totalram; | ||
| 49 | mem_info.TotalSwapMemory = meminfo.totalswap; | ||
| 50 | #endif | ||
| 51 | |||
| 52 | return mem_info; | ||
| 53 | } | ||
| 54 | |||
| 55 | const MemoryInfo& GetMemInfo() { | ||
| 56 | static MemoryInfo mem_info = Detect(); | ||
| 57 | return mem_info; | ||
| 58 | } | ||
| 59 | |||
| 60 | } // namespace Common \ No newline at end of file | ||
diff --git a/src/common/memory_detect.h b/src/common/memory_detect.h new file mode 100644 index 000000000..a73c0f3f4 --- /dev/null +++ b/src/common/memory_detect.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | struct MemoryInfo { | ||
| 12 | u64 TotalPhysicalMemory{}; | ||
| 13 | u64 TotalSwapMemory{}; | ||
| 14 | }; | ||
| 15 | |||
| 16 | /** | ||
| 17 | * Gets the memory info of the host system | ||
| 18 | * @return Reference to a MemoryInfo struct with the physical and swap memory sizes in bytes | ||
| 19 | */ | ||
| 20 | const MemoryInfo& GetMemInfo(); | ||
| 21 | |||
| 22 | } // namespace Common \ No newline at end of file | ||
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h new file mode 100644 index 000000000..794da8a52 --- /dev/null +++ b/src/common/x64/xbyak_abi.h | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | #include <initializer_list> | ||
| 9 | #include <xbyak.h> | ||
| 10 | #include "common/assert.h" | ||
| 11 | |||
| 12 | namespace Common::X64 { | ||
| 13 | |||
| 14 | inline int RegToIndex(const Xbyak::Reg& reg) { | ||
| 15 | using Kind = Xbyak::Reg::Kind; | ||
| 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, | ||
| 17 | "RegSet only support GPRs and XMM registers."); | ||
| 18 | ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15."); | ||
| 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); | ||
| 20 | } | ||
| 21 | |||
| 22 | inline Xbyak::Reg64 IndexToReg64(int reg_index) { | ||
| 23 | ASSERT(reg_index < 16); | ||
| 24 | return Xbyak::Reg64(reg_index); | ||
| 25 | } | ||
| 26 | |||
| 27 | inline Xbyak::Xmm IndexToXmm(int reg_index) { | ||
| 28 | ASSERT(reg_index >= 16 && reg_index < 32); | ||
| 29 | return Xbyak::Xmm(reg_index - 16); | ||
| 30 | } | ||
| 31 | |||
| 32 | inline Xbyak::Reg IndexToReg(int reg_index) { | ||
| 33 | if (reg_index < 16) { | ||
| 34 | return IndexToReg64(reg_index); | ||
| 35 | } else { | ||
| 36 | return IndexToXmm(reg_index); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { | ||
| 41 | std::bitset<32> bits; | ||
| 42 | for (const Xbyak::Reg& reg : regs) { | ||
| 43 | bits[RegToIndex(reg)] = true; | ||
| 44 | } | ||
| 45 | return bits; | ||
| 46 | } | ||
| 47 | |||
| 48 | const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); | ||
| 49 | const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); | ||
| 50 | |||
| 51 | #ifdef _WIN32 | ||
| 52 | |||
| 53 | // Microsoft x64 ABI | ||
| 54 | const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||
| 55 | const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; | ||
| 56 | const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; | ||
| 57 | const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; | ||
| 58 | const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; | ||
| 59 | |||
| 60 | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||
| 61 | // GPRs | ||
| 62 | Xbyak::util::rcx, | ||
| 63 | Xbyak::util::rdx, | ||
| 64 | Xbyak::util::r8, | ||
| 65 | Xbyak::util::r9, | ||
| 66 | Xbyak::util::r10, | ||
| 67 | Xbyak::util::r11, | ||
| 68 | // XMMs | ||
| 69 | Xbyak::util::xmm0, | ||
| 70 | Xbyak::util::xmm1, | ||
| 71 | Xbyak::util::xmm2, | ||
| 72 | Xbyak::util::xmm3, | ||
| 73 | Xbyak::util::xmm4, | ||
| 74 | Xbyak::util::xmm5, | ||
| 75 | }); | ||
| 76 | |||
| 77 | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||
| 78 | // GPRs | ||
| 79 | Xbyak::util::rbx, | ||
| 80 | Xbyak::util::rsi, | ||
| 81 | Xbyak::util::rdi, | ||
| 82 | Xbyak::util::rbp, | ||
| 83 | Xbyak::util::r12, | ||
| 84 | Xbyak::util::r13, | ||
| 85 | Xbyak::util::r14, | ||
| 86 | Xbyak::util::r15, | ||
| 87 | // XMMs | ||
| 88 | Xbyak::util::xmm6, | ||
| 89 | Xbyak::util::xmm7, | ||
| 90 | Xbyak::util::xmm8, | ||
| 91 | Xbyak::util::xmm9, | ||
| 92 | Xbyak::util::xmm10, | ||
| 93 | Xbyak::util::xmm11, | ||
| 94 | Xbyak::util::xmm12, | ||
| 95 | Xbyak::util::xmm13, | ||
| 96 | Xbyak::util::xmm14, | ||
| 97 | Xbyak::util::xmm15, | ||
| 98 | }); | ||
| 99 | |||
| 100 | constexpr size_t ABI_SHADOW_SPACE = 0x20; | ||
| 101 | |||
| 102 | #else | ||
| 103 | |||
| 104 | // System V x86-64 ABI | ||
| 105 | const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||
| 106 | const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; | ||
| 107 | const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; | ||
| 108 | const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; | ||
| 109 | const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; | ||
| 110 | |||
| 111 | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||
| 112 | // GPRs | ||
| 113 | Xbyak::util::rcx, | ||
| 114 | Xbyak::util::rdx, | ||
| 115 | Xbyak::util::rdi, | ||
| 116 | Xbyak::util::rsi, | ||
| 117 | Xbyak::util::r8, | ||
| 118 | Xbyak::util::r9, | ||
| 119 | Xbyak::util::r10, | ||
| 120 | Xbyak::util::r11, | ||
| 121 | // XMMs | ||
| 122 | Xbyak::util::xmm0, | ||
| 123 | Xbyak::util::xmm1, | ||
| 124 | Xbyak::util::xmm2, | ||
| 125 | Xbyak::util::xmm3, | ||
| 126 | Xbyak::util::xmm4, | ||
| 127 | Xbyak::util::xmm5, | ||
| 128 | Xbyak::util::xmm6, | ||
| 129 | Xbyak::util::xmm7, | ||
| 130 | Xbyak::util::xmm8, | ||
| 131 | Xbyak::util::xmm9, | ||
| 132 | Xbyak::util::xmm10, | ||
| 133 | Xbyak::util::xmm11, | ||
| 134 | Xbyak::util::xmm12, | ||
| 135 | Xbyak::util::xmm13, | ||
| 136 | Xbyak::util::xmm14, | ||
| 137 | Xbyak::util::xmm15, | ||
| 138 | }); | ||
| 139 | |||
| 140 | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||
| 141 | // GPRs | ||
| 142 | Xbyak::util::rbx, | ||
| 143 | Xbyak::util::rbp, | ||
| 144 | Xbyak::util::r12, | ||
| 145 | Xbyak::util::r13, | ||
| 146 | Xbyak::util::r14, | ||
| 147 | Xbyak::util::r15, | ||
| 148 | }); | ||
| 149 | |||
| 150 | constexpr size_t ABI_SHADOW_SPACE = 0; | ||
| 151 | |||
| 152 | #endif | ||
| 153 | |||
| 154 | inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | ||
| 155 | size_t needed_frame_size, s32* out_subtraction, | ||
| 156 | s32* out_xmm_offset) { | ||
| 157 | const auto count = (regs & ABI_ALL_GPRS).count(); | ||
| 158 | rsp_alignment -= count * 8; | ||
| 159 | size_t subtraction = 0; | ||
| 160 | const auto xmm_count = (regs & ABI_ALL_XMMS).count(); | ||
| 161 | if (xmm_count) { | ||
| 162 | // If we have any XMMs to save, we must align the stack here. | ||
| 163 | subtraction = rsp_alignment & 0xF; | ||
| 164 | } | ||
| 165 | subtraction += 0x10 * xmm_count; | ||
| 166 | size_t xmm_base_subtraction = subtraction; | ||
| 167 | subtraction += needed_frame_size; | ||
| 168 | subtraction += ABI_SHADOW_SPACE; | ||
| 169 | // Final alignment. | ||
| 170 | rsp_alignment -= subtraction; | ||
| 171 | subtraction += rsp_alignment & 0xF; | ||
| 172 | |||
| 173 | *out_subtraction = (s32)subtraction; | ||
| 174 | *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); | ||
| 175 | } | ||
| 176 | |||
| 177 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 178 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 179 | s32 subtraction, xmm_offset; | ||
| 180 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 181 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 182 | if (regs[i] && ABI_ALL_GPRS[i]) { | ||
| 183 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | if (subtraction != 0) { | ||
| 187 | code.sub(code.rsp, subtraction); | ||
| 188 | } | ||
| 189 | |||
| 190 | for (int i = 0; i < regs.count(); i++) { | ||
| 191 | if (regs.test(i) & ABI_ALL_GPRS.test(i)) { | ||
| 192 | code.push(IndexToReg64(i)); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 197 | if (regs[i] && ABI_ALL_XMMS[i]) { | ||
| 198 | code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); | ||
| 199 | xmm_offset += 0x10; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | return ABI_SHADOW_SPACE; | ||
| 204 | } | ||
| 205 | |||
| 206 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 207 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 208 | s32 subtraction, xmm_offset; | ||
| 209 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 210 | |||
| 211 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 212 | if (regs[i] && ABI_ALL_XMMS[i]) { | ||
| 213 | code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); | ||
| 214 | xmm_offset += 0x10; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | if (subtraction != 0) { | ||
| 219 | code.add(code.rsp, subtraction); | ||
| 220 | } | ||
| 221 | |||
| 222 | // GPRs need to be popped in reverse order | ||
| 223 | for (int i = 15; i >= 0; i--) { | ||
| 224 | if (regs[i]) { | ||
| 225 | code.pop(IndexToReg64(i)); | ||
| 226 | } | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 231 | size_t rsp_alignment, | ||
| 232 | size_t needed_frame_size = 0) { | ||
| 233 | s32 subtraction, xmm_offset; | ||
| 234 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 235 | |||
| 236 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 237 | if (regs[i] && ABI_ALL_GPRS[i]) { | ||
| 238 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | if (subtraction != 0) { | ||
| 243 | code.sub(code.rsp, subtraction); | ||
| 244 | } | ||
| 245 | |||
| 246 | return ABI_SHADOW_SPACE; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 250 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 251 | s32 subtraction, xmm_offset; | ||
| 252 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 253 | |||
| 254 | if (subtraction != 0) { | ||
| 255 | code.add(code.rsp, subtraction); | ||
| 256 | } | ||
| 257 | |||
| 258 | // GPRs need to be popped in reverse order | ||
| 259 | for (int i = 15; i >= 0; i--) { | ||
| 260 | if (regs[i]) { | ||
| 261 | code.pop(IndexToReg64(i)); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | } // namespace Common::X64 | ||
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h new file mode 100644 index 000000000..df17f8cbe --- /dev/null +++ b/src/common/x64/xbyak_util.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include <xbyak.h> | ||
| 9 | #include "common/x64/xbyak_abi.h" | ||
| 10 | |||
| 11 | namespace Common::X64 { | ||
| 12 | |||
| 13 | // Constants for use with cmpps/cmpss | ||
| 14 | enum { | ||
| 15 | CMP_EQ = 0, | ||
| 16 | CMP_LT = 1, | ||
| 17 | CMP_LE = 2, | ||
| 18 | CMP_UNORD = 3, | ||
| 19 | CMP_NEQ = 4, | ||
| 20 | CMP_NLT = 5, | ||
| 21 | CMP_NLE = 6, | ||
| 22 | CMP_ORD = 7, | ||
| 23 | }; | ||
| 24 | |||
| 25 | constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) { | ||
| 26 | const u64 distance = target - (ref + 5); | ||
| 27 | return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL); | ||
| 28 | } | ||
| 29 | |||
| 30 | inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) { | ||
| 31 | return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target); | ||
| 32 | } | ||
| 33 | |||
| 34 | template <typename T> | ||
| 35 | inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) { | ||
| 36 | static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer."); | ||
| 37 | size_t addr = reinterpret_cast<size_t>(f); | ||
| 38 | if (IsWithin2G(code, addr)) { | ||
| 39 | code.call(f); | ||
| 40 | } else { | ||
| 41 | // ABI_RETURN is a safe temp register to use before a call | ||
| 42 | code.mov(ABI_RETURN, addr); | ||
| 43 | code.call(ABI_RETURN); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | } // namespace Common::X64 | ||
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index b93aa6935..c47ff863e 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/file_util.h" | 10 | #include "common/file_util.h" |
| 11 | #include "common/hex_util.h" | 11 | #include "common/hex_util.h" |
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "common/string_util.h" | ||
| 13 | #include "core/core.h" | 14 | #include "core/core.h" |
| 14 | #include "core/file_sys/content_archive.h" | 15 | #include "core/file_sys/content_archive.h" |
| 15 | #include "core/file_sys/control_metadata.h" | 16 | #include "core/file_sys/control_metadata.h" |
| @@ -48,6 +49,23 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) { | |||
| 48 | return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]); | 49 | return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]); |
| 49 | } | 50 | } |
| 50 | 51 | ||
| 52 | std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir, | ||
| 53 | std::string_view name) { | ||
| 54 | #ifdef _WIN32 | ||
| 55 | return dir->GetSubdirectory(name); | ||
| 56 | #else | ||
| 57 | const auto subdirs = dir->GetSubdirectories(); | ||
| 58 | for (const auto& subdir : subdirs) { | ||
| 59 | std::string dir_name = Common::ToLower(subdir->GetName()); | ||
| 60 | if (dir_name == name) { | ||
| 61 | return subdir; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | return nullptr; | ||
| 66 | #endif | ||
| 67 | } | ||
| 68 | |||
| 51 | PatchManager::PatchManager(u64 title_id) : title_id(title_id) {} | 69 | PatchManager::PatchManager(u64 title_id) : title_id(title_id) {} |
| 52 | 70 | ||
| 53 | PatchManager::~PatchManager() = default; | 71 | PatchManager::~PatchManager() = default; |
| @@ -104,7 +122,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const { | |||
| 104 | if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end()) | 122 | if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end()) |
| 105 | continue; | 123 | continue; |
| 106 | 124 | ||
| 107 | auto exefs_dir = subdir->GetSubdirectory("exefs"); | 125 | auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs"); |
| 108 | if (exefs_dir != nullptr) | 126 | if (exefs_dir != nullptr) |
| 109 | layers.push_back(std::move(exefs_dir)); | 127 | layers.push_back(std::move(exefs_dir)); |
| 110 | } | 128 | } |
| @@ -130,7 +148,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD | |||
| 130 | if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) | 148 | if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) |
| 131 | continue; | 149 | continue; |
| 132 | 150 | ||
| 133 | auto exefs_dir = subdir->GetSubdirectory("exefs"); | 151 | auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs"); |
| 134 | if (exefs_dir != nullptr) { | 152 | if (exefs_dir != nullptr) { |
| 135 | for (const auto& file : exefs_dir->GetFiles()) { | 153 | for (const auto& file : exefs_dir->GetFiles()) { |
| 136 | if (file->GetExtension() == "ips") { | 154 | if (file->GetExtension() == "ips") { |
| @@ -295,7 +313,7 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList( | |||
| 295 | continue; | 313 | continue; |
| 296 | } | 314 | } |
| 297 | 315 | ||
| 298 | auto cheats_dir = subdir->GetSubdirectory("cheats"); | 316 | auto cheats_dir = FindSubdirectoryCaseless(subdir, "cheats"); |
| 299 | if (cheats_dir != nullptr) { | 317 | if (cheats_dir != nullptr) { |
| 300 | auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true); | 318 | auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true); |
| 301 | if (res.has_value()) { | 319 | if (res.has_value()) { |
| @@ -340,11 +358,11 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t | |||
| 340 | continue; | 358 | continue; |
| 341 | } | 359 | } |
| 342 | 360 | ||
| 343 | auto romfs_dir = subdir->GetSubdirectory("romfs"); | 361 | auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs"); |
| 344 | if (romfs_dir != nullptr) | 362 | if (romfs_dir != nullptr) |
| 345 | layers.push_back(std::move(romfs_dir)); | 363 | layers.push_back(std::move(romfs_dir)); |
| 346 | 364 | ||
| 347 | auto ext_dir = subdir->GetSubdirectory("romfs_ext"); | 365 | auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext"); |
| 348 | if (ext_dir != nullptr) | 366 | if (ext_dir != nullptr) |
| 349 | layers_ext.push_back(std::move(ext_dir)); | 367 | layers_ext.push_back(std::move(ext_dir)); |
| 350 | } | 368 | } |
| @@ -470,7 +488,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam | |||
| 470 | for (const auto& mod : mod_dir->GetSubdirectories()) { | 488 | for (const auto& mod : mod_dir->GetSubdirectories()) { |
| 471 | std::string types; | 489 | std::string types; |
| 472 | 490 | ||
| 473 | const auto exefs_dir = mod->GetSubdirectory("exefs"); | 491 | const auto exefs_dir = FindSubdirectoryCaseless(mod, "exefs"); |
| 474 | if (IsDirValidAndNonEmpty(exefs_dir)) { | 492 | if (IsDirValidAndNonEmpty(exefs_dir)) { |
| 475 | bool ips = false; | 493 | bool ips = false; |
| 476 | bool ipswitch = false; | 494 | bool ipswitch = false; |
| @@ -494,9 +512,9 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam | |||
| 494 | if (layeredfs) | 512 | if (layeredfs) |
| 495 | AppendCommaIfNotEmpty(types, "LayeredExeFS"); | 513 | AppendCommaIfNotEmpty(types, "LayeredExeFS"); |
| 496 | } | 514 | } |
| 497 | if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs"))) | 515 | if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "romfs"))) |
| 498 | AppendCommaIfNotEmpty(types, "LayeredFS"); | 516 | AppendCommaIfNotEmpty(types, "LayeredFS"); |
| 499 | if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats"))) | 517 | if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "cheats"))) |
| 500 | AppendCommaIfNotEmpty(types, "Cheats"); | 518 | AppendCommaIfNotEmpty(types, "Cheats"); |
| 501 | 519 | ||
| 502 | if (types.empty()) | 520 | if (types.empty()) |
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h index ec6db524d..f4cb918dd 100644 --- a/src/core/file_sys/patch_manager.h +++ b/src/core/file_sys/patch_manager.h | |||
| @@ -29,6 +29,11 @@ enum class TitleVersionFormat : u8 { | |||
| 29 | std::string FormatTitleVersion(u32 version, | 29 | std::string FormatTitleVersion(u32 version, |
| 30 | TitleVersionFormat format = TitleVersionFormat::ThreeElements); | 30 | TitleVersionFormat format = TitleVersionFormat::ThreeElements); |
| 31 | 31 | ||
| 32 | // Returns a directory with name matching name case-insensitive. Returns nullptr if directory | ||
| 33 | // doesn't have a directory with name. | ||
| 34 | std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir, | ||
| 35 | std::string_view name); | ||
| 36 | |||
| 32 | // A centralized class to manage patches to games. | 37 | // A centralized class to manage patches to games. |
| 33 | class PatchManager { | 38 | class PatchManager { |
| 34 | public: | 39 | public: |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 36724569f..c4c5199b1 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -132,7 +132,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const { | |||
| 132 | 132 | ||
| 133 | u64 Process::GetTotalPhysicalMemoryAvailable() const { | 133 | u64 Process::GetTotalPhysicalMemoryAvailable() const { |
| 134 | const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + | 134 | const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + |
| 135 | page_table->GetTotalHeapSize() + image_size + main_thread_stack_size}; | 135 | page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + |
| 136 | main_thread_stack_size}; | ||
| 136 | 137 | ||
| 137 | if (capacity < memory_usage_capacity) { | 138 | if (capacity < memory_usage_capacity) { |
| 138 | return capacity; | 139 | return capacity; |
| @@ -146,7 +147,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { | |||
| 146 | } | 147 | } |
| 147 | 148 | ||
| 148 | u64 Process::GetTotalPhysicalMemoryUsed() const { | 149 | u64 Process::GetTotalPhysicalMemoryUsed() const { |
| 149 | return image_size + main_thread_stack_size + page_table->GetTotalHeapSize(); | 150 | return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() + |
| 151 | GetSystemResourceSize(); | ||
| 150 | } | 152 | } |
| 151 | 153 | ||
| 152 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { | 154 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { |
diff --git a/src/core/hle/kernel/readable_event.cpp b/src/core/hle/kernel/readable_event.cpp index 00860fcbd..ef5e19e63 100644 --- a/src/core/hle/kernel/readable_event.cpp +++ b/src/core/hle/kernel/readable_event.cpp | |||
| @@ -38,7 +38,7 @@ void ReadableEvent::Clear() { | |||
| 38 | 38 | ||
| 39 | ResultCode ReadableEvent::Reset() { | 39 | ResultCode ReadableEvent::Reset() { |
| 40 | if (!is_signaled) { | 40 | if (!is_signaled) { |
| 41 | LOG_ERROR(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", | 41 | LOG_TRACE(Kernel, "Handle is not signaled! object_id={}, object_type={}, object_name={}", |
| 42 | GetObjectId(), GetTypeName(), GetName()); | 42 | GetObjectId(), GetTypeName(), GetName()); |
| 43 | return ERR_INVALID_STATE; | 43 | return ERR_INVALID_STATE; |
| 44 | } | 44 | } |
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index d9beaa3a4..212e442f4 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp | |||
| @@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) { | |||
| 24 | const std::size_t index{ResourceTypeToIndex(resource)}; | 24 | const std::size_t index{ResourceTypeToIndex(resource)}; |
| 25 | 25 | ||
| 26 | s64 new_value = current[index] + amount; | 26 | s64 new_value = current[index] + amount; |
| 27 | while (new_value > limit[index] && available[index] + amount <= limit[index]) { | 27 | if (new_value > limit[index] && available[index] + amount <= limit[index]) { |
| 28 | // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout | 28 | // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout |
| 29 | new_value = current[index] + amount; | 29 | new_value = current[index] + amount; |
| 30 | |||
| 31 | if (timeout >= 0) { | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | 30 | } |
| 35 | 31 | ||
| 36 | if (new_value <= limit[index]) { | 32 | if (new_value <= limit[index]) { |
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index c84cb1483..72a050de2 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -161,7 +161,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { | |||
| 161 | {40, nullptr, "AcquireXpadIdEventHandle"}, | 161 | {40, nullptr, "AcquireXpadIdEventHandle"}, |
| 162 | {41, nullptr, "ReleaseXpadIdEventHandle"}, | 162 | {41, nullptr, "ReleaseXpadIdEventHandle"}, |
| 163 | {51, &Hid::ActivateXpad, "ActivateXpad"}, | 163 | {51, &Hid::ActivateXpad, "ActivateXpad"}, |
| 164 | {55, nullptr, "GetXpadIds"}, | 164 | {55, &Hid::GetXpadIDs, "GetXpadIds"}, |
| 165 | {56, nullptr, "ActivateJoyXpad"}, | 165 | {56, nullptr, "ActivateJoyXpad"}, |
| 166 | {58, nullptr, "GetJoyXpadLifoHandle"}, | 166 | {58, nullptr, "GetJoyXpadLifoHandle"}, |
| 167 | {59, nullptr, "GetJoyXpadIds"}, | 167 | {59, nullptr, "GetJoyXpadIds"}, |
| @@ -319,6 +319,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) { | |||
| 319 | rb.Push(RESULT_SUCCESS); | 319 | rb.Push(RESULT_SUCCESS); |
| 320 | } | 320 | } |
| 321 | 321 | ||
| 322 | void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) { | ||
| 323 | IPC::RequestParser rp{ctx}; | ||
| 324 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 325 | |||
| 326 | LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id); | ||
| 327 | |||
| 328 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 329 | rb.Push(RESULT_SUCCESS); | ||
| 330 | rb.Push(0); | ||
| 331 | } | ||
| 332 | |||
| 322 | void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { | 333 | void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { |
| 323 | IPC::RequestParser rp{ctx}; | 334 | IPC::RequestParser rp{ctx}; |
| 324 | const auto applet_resource_user_id{rp.Pop<u64>()}; | 335 | const auto applet_resource_user_id{rp.Pop<u64>()}; |
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index c8ed4ad8b..d481a75f8 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h | |||
| @@ -86,6 +86,7 @@ public: | |||
| 86 | private: | 86 | private: |
| 87 | void CreateAppletResource(Kernel::HLERequestContext& ctx); | 87 | void CreateAppletResource(Kernel::HLERequestContext& ctx); |
| 88 | void ActivateXpad(Kernel::HLERequestContext& ctx); | 88 | void ActivateXpad(Kernel::HLERequestContext& ctx); |
| 89 | void GetXpadIDs(Kernel::HLERequestContext& ctx); | ||
| 89 | void ActivateDebugPad(Kernel::HLERequestContext& ctx); | 90 | void ActivateDebugPad(Kernel::HLERequestContext& ctx); |
| 90 | void ActivateTouchScreen(Kernel::HLERequestContext& ctx); | 91 | void ActivateTouchScreen(Kernel::HLERequestContext& ctx); |
| 91 | void ActivateMouse(Kernel::HLERequestContext& ctx); | 92 | void ActivateMouse(Kernel::HLERequestContext& ctx); |
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp index 767158444..01ddcdbd6 100644 --- a/src/core/hle/service/nifm/nifm.cpp +++ b/src/core/hle/service/nifm/nifm.cpp | |||
| @@ -177,7 +177,8 @@ private: | |||
| 177 | void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) { | 177 | void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) { |
| 178 | LOG_DEBUG(Service_NIFM, "called"); | 178 | LOG_DEBUG(Service_NIFM, "called"); |
| 179 | 179 | ||
| 180 | ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size"); | 180 | ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, |
| 181 | "SfNetworkProfileData is not the correct size"); | ||
| 181 | u128 uuid{}; | 182 | u128 uuid{}; |
| 182 | auto buffer = ctx.ReadBuffer(); | 183 | auto buffer = ctx.ReadBuffer(); |
| 183 | std::memcpy(&uuid, buffer.data() + 8, sizeof(u128)); | 184 | std::memcpy(&uuid, buffer.data() + 8, sizeof(u128)); |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index cc2192e5c..0d913334e 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp | |||
| @@ -25,7 +25,7 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, | |||
| 25 | case IoctlCommand::IocGetCharacteristicsCommand: | 25 | case IoctlCommand::IocGetCharacteristicsCommand: |
| 26 | return GetCharacteristics(input, output, output2, version); | 26 | return GetCharacteristics(input, output, output2, version); |
| 27 | case IoctlCommand::IocGetTPCMasksCommand: | 27 | case IoctlCommand::IocGetTPCMasksCommand: |
| 28 | return GetTPCMasks(input, output); | 28 | return GetTPCMasks(input, output, output2, version); |
| 29 | case IoctlCommand::IocGetActiveSlotMaskCommand: | 29 | case IoctlCommand::IocGetActiveSlotMaskCommand: |
| 30 | return GetActiveSlotMask(input, output); | 30 | return GetActiveSlotMask(input, output); |
| 31 | case IoctlCommand::IocZcullGetCtxSizeCommand: | 31 | case IoctlCommand::IocZcullGetCtxSizeCommand: |
| @@ -98,17 +98,22 @@ u32 nvhost_ctrl_gpu::GetCharacteristics(const std::vector<u8>& input, std::vecto | |||
| 98 | return 0; | 98 | return 0; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output) { | 101 | u32 nvhost_ctrl_gpu::GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, |
| 102 | std::vector<u8>& output2, IoctlVersion version) { | ||
| 102 | IoctlGpuGetTpcMasksArgs params{}; | 103 | IoctlGpuGetTpcMasksArgs params{}; |
| 103 | std::memcpy(¶ms, input.data(), input.size()); | 104 | std::memcpy(¶ms, input.data(), input.size()); |
| 104 | LOG_INFO(Service_NVDRV, "called, mask=0x{:X}, mask_buf_addr=0x{:X}", params.mask_buf_size, | 105 | LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); |
| 105 | params.mask_buf_addr); | 106 | if (params.mask_buffer_size != 0) { |
| 106 | // TODO(ogniK): Confirm value on hardware | 107 | params.tcp_mask = 3; |
| 107 | if (params.mask_buf_size) | 108 | } |
| 108 | params.tpc_mask_size = 4 * 1; // 4 * num_gpc | 109 | |
| 109 | else | 110 | if (version == IoctlVersion::Version3) { |
| 110 | params.tpc_mask_size = 0; | 111 | std::memcpy(output.data(), input.data(), output.size()); |
| 111 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 112 | std::memcpy(output2.data(), ¶ms.tcp_mask, output2.size()); |
| 113 | } else { | ||
| 114 | std::memcpy(output.data(), ¶ms, output.size()); | ||
| 115 | } | ||
| 116 | |||
| 112 | return 0; | 117 | return 0; |
| 113 | } | 118 | } |
| 114 | 119 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index 07b644ec5..ef60f72ce 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h | |||
| @@ -92,16 +92,11 @@ private: | |||
| 92 | "IoctlCharacteristics is incorrect size"); | 92 | "IoctlCharacteristics is incorrect size"); |
| 93 | 93 | ||
| 94 | struct IoctlGpuGetTpcMasksArgs { | 94 | struct IoctlGpuGetTpcMasksArgs { |
| 95 | /// [in] TPC mask buffer size reserved by userspace. Should be at least | 95 | u32_le mask_buffer_size{}; |
| 96 | /// sizeof(__u32) * fls(gpc_mask) to receive TPC mask for each GPC. | 96 | INSERT_PADDING_WORDS(1); |
| 97 | /// [out] full kernel buffer size | 97 | u64_le mask_buffer_address{}; |
| 98 | u32_le mask_buf_size; | 98 | u32_le tcp_mask{}; |
| 99 | u32_le reserved; | 99 | INSERT_PADDING_WORDS(1); |
| 100 | |||
| 101 | /// [in] pointer to TPC mask buffer. It will receive one 32-bit TPC mask per GPC or 0 if | ||
| 102 | /// GPC is not enabled or not present. This parameter is ignored if mask_buf_size is 0. | ||
| 103 | u64_le mask_buf_addr; | ||
| 104 | u64_le tpc_mask_size; // Nintendo add this? | ||
| 105 | }; | 100 | }; |
| 106 | static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24, | 101 | static_assert(sizeof(IoctlGpuGetTpcMasksArgs) == 24, |
| 107 | "IoctlGpuGetTpcMasksArgs is incorrect size"); | 102 | "IoctlGpuGetTpcMasksArgs is incorrect size"); |
| @@ -166,7 +161,8 @@ private: | |||
| 166 | 161 | ||
| 167 | u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output, | 162 | u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output, |
| 168 | std::vector<u8>& output2, IoctlVersion version); | 163 | std::vector<u8>& output2, IoctlVersion version); |
| 169 | u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output); | 164 | u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output, std::vector<u8>& output2, |
| 165 | IoctlVersion version); | ||
| 170 | u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); | 166 | u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output); |
| 171 | u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output); | 167 | u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output); |
| 172 | u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output); | 168 | u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output); |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index da53cde05..4edff9cd8 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -112,6 +112,7 @@ void LogSettings() { | |||
| 112 | LogSetting("Renderer_UseAsynchronousGpuEmulation", | 112 | LogSetting("Renderer_UseAsynchronousGpuEmulation", |
| 113 | Settings::values.use_asynchronous_gpu_emulation); | 113 | Settings::values.use_asynchronous_gpu_emulation); |
| 114 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); | 114 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); |
| 115 | LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); | ||
| 115 | LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); | 116 | LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); |
| 116 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); | 117 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); |
| 117 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); | 118 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); |
diff --git a/src/core/settings.h b/src/core/settings.h index c1266b341..33e1e06cd 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -437,7 +437,7 @@ struct Values { | |||
| 437 | bool renderer_debug; | 437 | bool renderer_debug; |
| 438 | int vulkan_device; | 438 | int vulkan_device; |
| 439 | 439 | ||
| 440 | float resolution_factor; | 440 | u16 resolution_factor{1}; |
| 441 | int aspect_ratio; | 441 | int aspect_ratio; |
| 442 | int max_anisotropy; | 442 | int max_anisotropy; |
| 443 | bool use_frame_limit; | 443 | bool use_frame_limit; |
| @@ -446,6 +446,7 @@ struct Values { | |||
| 446 | GPUAccuracy gpu_accuracy; | 446 | GPUAccuracy gpu_accuracy; |
| 447 | bool use_asynchronous_gpu_emulation; | 447 | bool use_asynchronous_gpu_emulation; |
| 448 | bool use_vsync; | 448 | bool use_vsync; |
| 449 | bool use_assembly_shaders; | ||
| 449 | bool force_30fps_mode; | 450 | bool force_30fps_mode; |
| 450 | bool use_fast_gpu_time; | 451 | bool use_fast_gpu_time; |
| 451 | 452 | ||
| @@ -473,6 +474,7 @@ struct Values { | |||
| 473 | bool reporting_services; | 474 | bool reporting_services; |
| 474 | bool quest_flag; | 475 | bool quest_flag; |
| 475 | bool disable_cpu_opt; | 476 | bool disable_cpu_opt; |
| 477 | bool disable_macro_jit; | ||
| 476 | 478 | ||
| 477 | // BCAT | 479 | // BCAT |
| 478 | std::string bcat_backend; | 480 | std::string bcat_backend; |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 1c3b03a1c..c781b3cfc 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 201 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", | 201 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", |
| 202 | Settings::values.use_asynchronous_gpu_emulation); | 202 | Settings::values.use_asynchronous_gpu_emulation); |
| 203 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); | 203 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); |
| 204 | AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders); | ||
| 204 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); | 205 | AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); |
| 205 | } | 206 | } |
| 206 | 207 | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d23c53843..099bb446e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | add_library(video_core STATIC | 1 | add_library(video_core STATIC |
| 2 | buffer_cache/buffer_block.h | 2 | buffer_cache/buffer_block.h |
| 3 | buffer_cache/buffer_cache.h | 3 | buffer_cache/buffer_cache.h |
| 4 | buffer_cache/map_interval.cpp | ||
| 4 | buffer_cache/map_interval.h | 5 | buffer_cache/map_interval.h |
| 5 | dirty_flags.cpp | 6 | dirty_flags.cpp |
| 6 | dirty_flags.h | 7 | dirty_flags.h |
| @@ -24,6 +25,12 @@ add_library(video_core STATIC | |||
| 24 | engines/shader_bytecode.h | 25 | engines/shader_bytecode.h |
| 25 | engines/shader_header.h | 26 | engines/shader_header.h |
| 26 | engines/shader_type.h | 27 | engines/shader_type.h |
| 28 | macro/macro.cpp | ||
| 29 | macro/macro.h | ||
| 30 | macro/macro_interpreter.cpp | ||
| 31 | macro/macro_interpreter.h | ||
| 32 | macro/macro_jit_x64.cpp | ||
| 33 | macro/macro_jit_x64.h | ||
| 27 | fence_manager.h | 34 | fence_manager.h |
| 28 | gpu.cpp | 35 | gpu.cpp |
| 29 | gpu.h | 36 | gpu.h |
| @@ -35,8 +42,6 @@ add_library(video_core STATIC | |||
| 35 | gpu_thread.h | 42 | gpu_thread.h |
| 36 | guest_driver.cpp | 43 | guest_driver.cpp |
| 37 | guest_driver.h | 44 | guest_driver.h |
| 38 | macro_interpreter.cpp | ||
| 39 | macro_interpreter.h | ||
| 40 | memory_manager.cpp | 45 | memory_manager.cpp |
| 41 | memory_manager.h | 46 | memory_manager.h |
| 42 | morton.cpp | 47 | morton.cpp |
| @@ -44,11 +49,11 @@ add_library(video_core STATIC | |||
| 44 | query_cache.h | 49 | query_cache.h |
| 45 | rasterizer_accelerated.cpp | 50 | rasterizer_accelerated.cpp |
| 46 | rasterizer_accelerated.h | 51 | rasterizer_accelerated.h |
| 47 | rasterizer_cache.cpp | ||
| 48 | rasterizer_cache.h | ||
| 49 | rasterizer_interface.h | 52 | rasterizer_interface.h |
| 50 | renderer_base.cpp | 53 | renderer_base.cpp |
| 51 | renderer_base.h | 54 | renderer_base.h |
| 55 | renderer_opengl/gl_arb_decompiler.cpp | ||
| 56 | renderer_opengl/gl_arb_decompiler.h | ||
| 52 | renderer_opengl/gl_buffer_cache.cpp | 57 | renderer_opengl/gl_buffer_cache.cpp |
| 53 | renderer_opengl/gl_buffer_cache.h | 58 | renderer_opengl/gl_buffer_cache.h |
| 54 | renderer_opengl/gl_device.cpp | 59 | renderer_opengl/gl_device.cpp |
| @@ -88,6 +93,7 @@ add_library(video_core STATIC | |||
| 88 | renderer_opengl/utils.h | 93 | renderer_opengl/utils.h |
| 89 | sampler_cache.cpp | 94 | sampler_cache.cpp |
| 90 | sampler_cache.h | 95 | sampler_cache.h |
| 96 | shader_cache.h | ||
| 91 | shader/decode/arithmetic.cpp | 97 | shader/decode/arithmetic.cpp |
| 92 | shader/decode/arithmetic_immediate.cpp | 98 | shader/decode/arithmetic_immediate.cpp |
| 93 | shader/decode/bfe.cpp | 99 | shader/decode/bfe.cpp |
| @@ -228,7 +234,7 @@ endif() | |||
| 228 | create_target_directory_groups(video_core) | 234 | create_target_directory_groups(video_core) |
| 229 | 235 | ||
| 230 | target_link_libraries(video_core PUBLIC common core) | 236 | target_link_libraries(video_core PUBLIC common core) |
| 231 | target_link_libraries(video_core PRIVATE glad) | 237 | target_link_libraries(video_core PRIVATE glad xbyak) |
| 232 | 238 | ||
| 233 | if (ENABLE_VULKAN) | 239 | if (ENABLE_VULKAN) |
| 234 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | 240 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index e35ee0b67..e64170e66 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h | |||
| @@ -15,48 +15,47 @@ namespace VideoCommon { | |||
| 15 | 15 | ||
| 16 | class BufferBlock { | 16 | class BufferBlock { |
| 17 | public: | 17 | public: |
| 18 | bool Overlaps(const VAddr start, const VAddr end) const { | 18 | bool Overlaps(VAddr start, VAddr end) const { |
| 19 | return (cpu_addr < end) && (cpu_addr_end > start); | 19 | return (cpu_addr < end) && (cpu_addr_end > start); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | bool IsInside(const VAddr other_start, const VAddr other_end) const { | 22 | bool IsInside(VAddr other_start, VAddr other_end) const { |
| 23 | return cpu_addr <= other_start && other_end <= cpu_addr_end; | 23 | return cpu_addr <= other_start && other_end <= cpu_addr_end; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | std::size_t GetOffset(const VAddr in_addr) { | 26 | std::size_t Offset(VAddr in_addr) const { |
| 27 | return static_cast<std::size_t>(in_addr - cpu_addr); | 27 | return static_cast<std::size_t>(in_addr - cpu_addr); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | VAddr GetCpuAddr() const { | 30 | VAddr CpuAddr() const { |
| 31 | return cpu_addr; | 31 | return cpu_addr; |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | VAddr GetCpuAddrEnd() const { | 34 | VAddr CpuAddrEnd() const { |
| 35 | return cpu_addr_end; | 35 | return cpu_addr_end; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | void SetCpuAddr(const VAddr new_addr) { | 38 | void SetCpuAddr(VAddr new_addr) { |
| 39 | cpu_addr = new_addr; | 39 | cpu_addr = new_addr; |
| 40 | cpu_addr_end = new_addr + size; | 40 | cpu_addr_end = new_addr + size; |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | std::size_t GetSize() const { | 43 | std::size_t Size() const { |
| 44 | return size; | 44 | return size; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void SetEpoch(u64 new_epoch) { | 47 | u64 Epoch() const { |
| 48 | epoch = new_epoch; | 48 | return epoch; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| 51 | u64 GetEpoch() { | 51 | void SetEpoch(u64 new_epoch) { |
| 52 | return epoch; | 52 | epoch = new_epoch; |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | protected: | 55 | protected: |
| 56 | explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { | 56 | explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { |
| 57 | SetCpuAddr(cpu_addr); | 57 | SetCpuAddr(cpu_addr_); |
| 58 | } | 58 | } |
| 59 | ~BufferBlock() = default; | ||
| 60 | 59 | ||
| 61 | private: | 60 | private: |
| 62 | VAddr cpu_addr{}; | 61 | VAddr cpu_addr{}; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 56e570994..308d8b55f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -12,11 +12,12 @@ | |||
| 12 | #include <utility> | 12 | #include <utility> |
| 13 | #include <vector> | 13 | #include <vector> |
| 14 | 14 | ||
| 15 | #include <boost/icl/interval_map.hpp> | 15 | #include <boost/container/small_vector.hpp> |
| 16 | #include <boost/icl/interval_set.hpp> | 16 | #include <boost/icl/interval_set.hpp> |
| 17 | #include <boost/range/iterator_range.hpp> | 17 | #include <boost/intrusive/set.hpp> |
| 18 | 18 | ||
| 19 | #include "common/alignment.h" | 19 | #include "common/alignment.h" |
| 20 | #include "common/assert.h" | ||
| 20 | #include "common/common_types.h" | 21 | #include "common/common_types.h" |
| 21 | #include "common/logging/log.h" | 22 | #include "common/logging/log.h" |
| 22 | #include "core/core.h" | 23 | #include "core/core.h" |
| @@ -29,10 +30,16 @@ | |||
| 29 | 30 | ||
| 30 | namespace VideoCommon { | 31 | namespace VideoCommon { |
| 31 | 32 | ||
| 32 | using MapInterval = std::shared_ptr<MapIntervalBase>; | 33 | template <typename Buffer, typename BufferType, typename StreamBuffer> |
| 33 | |||
| 34 | template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> | ||
| 35 | class BufferCache { | 34 | class BufferCache { |
| 35 | using IntervalSet = boost::icl::interval_set<VAddr>; | ||
| 36 | using IntervalType = typename IntervalSet::interval_type; | ||
| 37 | using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; | ||
| 38 | |||
| 39 | static constexpr u64 WRITE_PAGE_BIT = 11; | ||
| 40 | static constexpr u64 BLOCK_PAGE_BITS = 21; | ||
| 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; | ||
| 42 | |||
| 36 | public: | 43 | public: |
| 37 | using BufferInfo = std::pair<BufferType, u64>; | 44 | using BufferInfo = std::pair<BufferType, u64>; |
| 38 | 45 | ||
| @@ -40,14 +47,12 @@ public: | |||
| 40 | bool is_written = false, bool use_fast_cbuf = false) { | 47 | bool is_written = false, bool use_fast_cbuf = false) { |
| 41 | std::lock_guard lock{mutex}; | 48 | std::lock_guard lock{mutex}; |
| 42 | 49 | ||
| 43 | const std::optional<VAddr> cpu_addr_opt = | 50 | const auto& memory_manager = system.GPU().MemoryManager(); |
| 44 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | 51 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); |
| 45 | |||
| 46 | if (!cpu_addr_opt) { | 52 | if (!cpu_addr_opt) { |
| 47 | return {GetEmptyBuffer(size), 0}; | 53 | return {GetEmptyBuffer(size), 0}; |
| 48 | } | 54 | } |
| 49 | 55 | const VAddr cpu_addr = *cpu_addr_opt; | |
| 50 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 51 | 56 | ||
| 52 | // Cache management is a big overhead, so only cache entries with a given size. | 57 | // Cache management is a big overhead, so only cache entries with a given size. |
| 53 | // TODO: Figure out which size is the best for given games. | 58 | // TODO: Figure out which size is the best for given games. |
| @@ -55,76 +60,91 @@ public: | |||
| 55 | if (use_fast_cbuf || size < max_stream_size) { | 60 | if (use_fast_cbuf || size < max_stream_size) { |
| 56 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { | 61 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { |
| 57 | auto& memory_manager = system.GPU().MemoryManager(); | 62 | auto& memory_manager = system.GPU().MemoryManager(); |
| 63 | const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); | ||
| 58 | if (use_fast_cbuf) { | 64 | if (use_fast_cbuf) { |
| 59 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 65 | u8* dest; |
| 60 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 66 | if (is_granular) { |
| 61 | return ConstBufferUpload(host_ptr, size); | 67 | dest = memory_manager.GetPointer(gpu_addr); |
| 62 | } else { | 68 | } else { |
| 63 | staging_buffer.resize(size); | 69 | staging_buffer.resize(size); |
| 64 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | 70 | dest = staging_buffer.data(); |
| 65 | return ConstBufferUpload(staging_buffer.data(), size); | 71 | memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); |
| 66 | } | 72 | } |
| 73 | return ConstBufferUpload(dest, size); | ||
| 74 | } | ||
| 75 | if (is_granular) { | ||
| 76 | u8* const host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 77 | return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { | ||
| 78 | std::memcpy(dest, host_ptr, size); | ||
| 79 | }); | ||
| 67 | } else { | 80 | } else { |
| 68 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 81 | return StreamBufferUpload( |
| 69 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 82 | size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { |
| 70 | return StreamBufferUpload(host_ptr, size, alignment); | 83 | memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); |
| 71 | } else { | 84 | }); |
| 72 | staging_buffer.resize(size); | ||
| 73 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 74 | return StreamBufferUpload(staging_buffer.data(), size, alignment); | ||
| 75 | } | ||
| 76 | } | 85 | } |
| 77 | } | 86 | } |
| 78 | } | 87 | } |
| 79 | 88 | ||
| 80 | auto block = GetBlock(cpu_addr, size); | 89 | Buffer* const block = GetBlock(cpu_addr, size); |
| 81 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); | 90 | MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 91 | if (!map) { | ||
| 92 | return {GetEmptyBuffer(size), 0}; | ||
| 93 | } | ||
| 82 | if (is_written) { | 94 | if (is_written) { |
| 83 | map->MarkAsModified(true, GetModifiedTicks()); | 95 | map->MarkAsModified(true, GetModifiedTicks()); |
| 84 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | 96 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { |
| 85 | MarkForAsyncFlush(map); | 97 | MarkForAsyncFlush(map); |
| 86 | } | 98 | } |
| 87 | if (!map->IsWritten()) { | 99 | if (!map->is_written) { |
| 88 | map->MarkAsWritten(true); | 100 | map->is_written = true; |
| 89 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 101 | MarkRegionAsWritten(map->start, map->end - 1); |
| 90 | } | 102 | } |
| 91 | } | 103 | } |
| 92 | 104 | ||
| 93 | return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; | 105 | return {block->Handle(), static_cast<u64>(block->Offset(cpu_addr))}; |
| 94 | } | 106 | } |
| 95 | 107 | ||
| 96 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | 108 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. |
| 97 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | 109 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, |
| 98 | std::size_t alignment = 4) { | 110 | std::size_t alignment = 4) { |
| 99 | std::lock_guard lock{mutex}; | 111 | std::lock_guard lock{mutex}; |
| 100 | return StreamBufferUpload(raw_pointer, size, alignment); | 112 | return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { |
| 113 | std::memcpy(dest, raw_pointer, size); | ||
| 114 | }); | ||
| 101 | } | 115 | } |
| 102 | 116 | ||
| 103 | void Map(std::size_t max_size) { | 117 | /// Prepares the buffer cache for data uploading |
| 118 | /// @param max_size Maximum number of bytes that will be uploaded | ||
| 119 | /// @return True when a stream buffer invalidation was required, false otherwise | ||
| 120 | bool Map(std::size_t max_size) { | ||
| 104 | std::lock_guard lock{mutex}; | 121 | std::lock_guard lock{mutex}; |
| 105 | 122 | ||
| 123 | bool invalidated; | ||
| 106 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | 124 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); |
| 107 | buffer_offset = buffer_offset_base; | 125 | buffer_offset = buffer_offset_base; |
| 126 | |||
| 127 | return invalidated; | ||
| 108 | } | 128 | } |
| 109 | 129 | ||
| 110 | /// Finishes the upload stream, returns true on bindings invalidation. | 130 | /// Finishes the upload stream |
| 111 | bool Unmap() { | 131 | void Unmap() { |
| 112 | std::lock_guard lock{mutex}; | 132 | std::lock_guard lock{mutex}; |
| 113 | |||
| 114 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | 133 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
| 115 | return std::exchange(invalidated, false); | ||
| 116 | } | 134 | } |
| 117 | 135 | ||
| 136 | /// Function called at the end of each frame, inteded for deferred operations | ||
| 118 | void TickFrame() { | 137 | void TickFrame() { |
| 119 | ++epoch; | 138 | ++epoch; |
| 139 | |||
| 120 | while (!pending_destruction.empty()) { | 140 | while (!pending_destruction.empty()) { |
| 121 | // Delay at least 4 frames before destruction. | 141 | // Delay at least 4 frames before destruction. |
| 122 | // This is due to triple buffering happening on some drivers. | 142 | // This is due to triple buffering happening on some drivers. |
| 123 | static constexpr u64 epochs_to_destroy = 5; | 143 | static constexpr u64 epochs_to_destroy = 5; |
| 124 | if (pending_destruction.front()->GetEpoch() + epochs_to_destroy > epoch) { | 144 | if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { |
| 125 | break; | 145 | break; |
| 126 | } | 146 | } |
| 127 | pending_destruction.pop_front(); | 147 | pending_destruction.pop(); |
| 128 | } | 148 | } |
| 129 | } | 149 | } |
| 130 | 150 | ||
| @@ -132,12 +152,11 @@ public: | |||
| 132 | void FlushRegion(VAddr addr, std::size_t size) { | 152 | void FlushRegion(VAddr addr, std::size_t size) { |
| 133 | std::lock_guard lock{mutex}; | 153 | std::lock_guard lock{mutex}; |
| 134 | 154 | ||
| 135 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 155 | VectorMapInterval objects = GetMapsInRange(addr, size); |
| 136 | std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { | 156 | std::sort(objects.begin(), objects.end(), |
| 137 | return a->GetModificationTick() < b->GetModificationTick(); | 157 | [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); |
| 138 | }); | 158 | for (MapInterval* object : objects) { |
| 139 | for (auto& object : objects) { | 159 | if (object->is_modified && object->is_registered) { |
| 140 | if (object->IsModified() && object->IsRegistered()) { | ||
| 141 | mutex.unlock(); | 160 | mutex.unlock(); |
| 142 | FlushMap(object); | 161 | FlushMap(object); |
| 143 | mutex.lock(); | 162 | mutex.lock(); |
| @@ -148,9 +167,9 @@ public: | |||
| 148 | bool MustFlushRegion(VAddr addr, std::size_t size) { | 167 | bool MustFlushRegion(VAddr addr, std::size_t size) { |
| 149 | std::lock_guard lock{mutex}; | 168 | std::lock_guard lock{mutex}; |
| 150 | 169 | ||
| 151 | const std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 170 | const VectorMapInterval objects = GetMapsInRange(addr, size); |
| 152 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { | 171 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { |
| 153 | return map->IsModified() && map->IsRegistered(); | 172 | return map->is_modified && map->is_registered; |
| 154 | }); | 173 | }); |
| 155 | } | 174 | } |
| 156 | 175 | ||
| @@ -158,9 +177,8 @@ public: | |||
| 158 | void InvalidateRegion(VAddr addr, u64 size) { | 177 | void InvalidateRegion(VAddr addr, u64 size) { |
| 159 | std::lock_guard lock{mutex}; | 178 | std::lock_guard lock{mutex}; |
| 160 | 179 | ||
| 161 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 180 | for (auto& object : GetMapsInRange(addr, size)) { |
| 162 | for (auto& object : objects) { | 181 | if (object->is_registered) { |
| 163 | if (object->IsRegistered()) { | ||
| 164 | Unregister(object); | 182 | Unregister(object); |
| 165 | } | 183 | } |
| 166 | } | 184 | } |
| @@ -169,10 +187,10 @@ public: | |||
| 169 | void OnCPUWrite(VAddr addr, std::size_t size) { | 187 | void OnCPUWrite(VAddr addr, std::size_t size) { |
| 170 | std::lock_guard lock{mutex}; | 188 | std::lock_guard lock{mutex}; |
| 171 | 189 | ||
| 172 | for (const auto& object : GetMapsInRange(addr, size)) { | 190 | for (MapInterval* object : GetMapsInRange(addr, size)) { |
| 173 | if (object->IsMemoryMarked() && object->IsRegistered()) { | 191 | if (object->is_memory_marked && object->is_registered) { |
| 174 | UnmarkMemory(object); | 192 | UnmarkMemory(object); |
| 175 | object->SetSyncPending(true); | 193 | object->is_sync_pending = true; |
| 176 | marked_for_unregister.emplace_back(object); | 194 | marked_for_unregister.emplace_back(object); |
| 177 | } | 195 | } |
| 178 | } | 196 | } |
| @@ -181,9 +199,9 @@ public: | |||
| 181 | void SyncGuestHost() { | 199 | void SyncGuestHost() { |
| 182 | std::lock_guard lock{mutex}; | 200 | std::lock_guard lock{mutex}; |
| 183 | 201 | ||
| 184 | for (const auto& object : marked_for_unregister) { | 202 | for (auto& object : marked_for_unregister) { |
| 185 | if (object->IsRegistered()) { | 203 | if (object->is_registered) { |
| 186 | object->SetSyncPending(false); | 204 | object->is_sync_pending = false; |
| 187 | Unregister(object); | 205 | Unregister(object); |
| 188 | } | 206 | } |
| 189 | } | 207 | } |
| @@ -192,9 +210,9 @@ public: | |||
| 192 | 210 | ||
| 193 | void CommitAsyncFlushes() { | 211 | void CommitAsyncFlushes() { |
| 194 | if (uncommitted_flushes) { | 212 | if (uncommitted_flushes) { |
| 195 | auto commit_list = std::make_shared<std::list<MapInterval>>(); | 213 | auto commit_list = std::make_shared<std::list<MapInterval*>>(); |
| 196 | for (auto& map : *uncommitted_flushes) { | 214 | for (MapInterval* map : *uncommitted_flushes) { |
| 197 | if (map->IsRegistered() && map->IsModified()) { | 215 | if (map->is_registered && map->is_modified) { |
| 198 | // TODO(Blinkhawk): Implement backend asynchronous flushing | 216 | // TODO(Blinkhawk): Implement backend asynchronous flushing |
| 199 | // AsyncFlushMap(map) | 217 | // AsyncFlushMap(map) |
| 200 | commit_list->push_back(map); | 218 | commit_list->push_back(map); |
| @@ -228,8 +246,8 @@ public: | |||
| 228 | committed_flushes.pop_front(); | 246 | committed_flushes.pop_front(); |
| 229 | return; | 247 | return; |
| 230 | } | 248 | } |
| 231 | for (MapInterval& map : *flush_list) { | 249 | for (MapInterval* map : *flush_list) { |
| 232 | if (map->IsRegistered()) { | 250 | if (map->is_registered) { |
| 233 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush | 251 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush |
| 234 | FlushMap(map); | 252 | FlushMap(map); |
| 235 | } | 253 | } |
| @@ -241,23 +259,21 @@ public: | |||
| 241 | 259 | ||
| 242 | protected: | 260 | protected: |
| 243 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 261 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 244 | std::unique_ptr<StreamBuffer> stream_buffer) | 262 | std::unique_ptr<StreamBuffer> stream_buffer_) |
| 245 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, | 263 | : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer_)}, |
| 246 | stream_buffer_handle{this->stream_buffer->GetHandle()} {} | 264 | stream_buffer_handle{stream_buffer->Handle()} {} |
| 247 | 265 | ||
| 248 | ~BufferCache() = default; | 266 | ~BufferCache() = default; |
| 249 | 267 | ||
| 250 | virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; | 268 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; |
| 251 | |||
| 252 | virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; | ||
| 253 | 269 | ||
| 254 | virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, | 270 | virtual void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 255 | const u8* data) = 0; | 271 | const u8* data) = 0; |
| 256 | 272 | ||
| 257 | virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, | 273 | virtual void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 258 | u8* data) = 0; | 274 | u8* data) = 0; |
| 259 | 275 | ||
| 260 | virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset, | 276 | virtual void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 261 | std::size_t dst_offset, std::size_t size) = 0; | 277 | std::size_t dst_offset, std::size_t size) = 0; |
| 262 | 278 | ||
| 263 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { | 279 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { |
| @@ -265,76 +281,74 @@ protected: | |||
| 265 | } | 281 | } |
| 266 | 282 | ||
| 267 | /// Register an object into the cache | 283 | /// Register an object into the cache |
| 268 | void Register(const MapInterval& new_map, bool inherit_written = false) { | 284 | MapInterval* Register(MapInterval new_map, bool inherit_written = false) { |
| 269 | const VAddr cpu_addr = new_map->GetStart(); | 285 | const VAddr cpu_addr = new_map.start; |
| 270 | if (!cpu_addr) { | 286 | if (!cpu_addr) { |
| 271 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | 287 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", |
| 272 | new_map->GetGpuAddress()); | 288 | new_map.gpu_addr); |
| 273 | return; | 289 | return nullptr; |
| 274 | } | 290 | } |
| 275 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | 291 | const std::size_t size = new_map.end - new_map.start; |
| 276 | new_map->MarkAsRegistered(true); | 292 | new_map.is_registered = true; |
| 277 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | ||
| 278 | mapped_addresses.insert({interval, new_map}); | ||
| 279 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 293 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 280 | new_map->SetMemoryMarked(true); | 294 | new_map.is_memory_marked = true; |
| 281 | if (inherit_written) { | 295 | if (inherit_written) { |
| 282 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | 296 | MarkRegionAsWritten(new_map.start, new_map.end - 1); |
| 283 | new_map->MarkAsWritten(true); | 297 | new_map.is_written = true; |
| 284 | } | 298 | } |
| 299 | MapInterval* const storage = mapped_addresses_allocator.Allocate(); | ||
| 300 | *storage = new_map; | ||
| 301 | mapped_addresses.insert(*storage); | ||
| 302 | return storage; | ||
| 285 | } | 303 | } |
| 286 | 304 | ||
| 287 | void UnmarkMemory(const MapInterval& map) { | 305 | void UnmarkMemory(MapInterval* map) { |
| 288 | if (!map->IsMemoryMarked()) { | 306 | if (!map->is_memory_marked) { |
| 289 | return; | 307 | return; |
| 290 | } | 308 | } |
| 291 | const std::size_t size = map->GetEnd() - map->GetStart(); | 309 | const std::size_t size = map->end - map->start; |
| 292 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); | 310 | rasterizer.UpdatePagesCachedCount(map->start, size, -1); |
| 293 | map->SetMemoryMarked(false); | 311 | map->is_memory_marked = false; |
| 294 | } | 312 | } |
| 295 | 313 | ||
| 296 | /// Unregisters an object from the cache | 314 | /// Unregisters an object from the cache |
| 297 | void Unregister(const MapInterval& map) { | 315 | void Unregister(MapInterval* map) { |
| 298 | UnmarkMemory(map); | 316 | UnmarkMemory(map); |
| 299 | map->MarkAsRegistered(false); | 317 | map->is_registered = false; |
| 300 | if (map->IsSyncPending()) { | 318 | if (map->is_sync_pending) { |
| 319 | map->is_sync_pending = false; | ||
| 301 | marked_for_unregister.remove(map); | 320 | marked_for_unregister.remove(map); |
| 302 | map->SetSyncPending(false); | ||
| 303 | } | 321 | } |
| 304 | if (map->IsWritten()) { | 322 | if (map->is_written) { |
| 305 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 323 | UnmarkRegionAsWritten(map->start, map->end - 1); |
| 306 | } | 324 | } |
| 307 | const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; | 325 | const auto it = mapped_addresses.find(*map); |
| 308 | mapped_addresses.erase(delete_interval); | 326 | ASSERT(it != mapped_addresses.end()); |
| 327 | mapped_addresses.erase(it); | ||
| 328 | mapped_addresses_allocator.Release(map); | ||
| 309 | } | 329 | } |
| 310 | 330 | ||
| 311 | private: | 331 | private: |
| 312 | MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { | 332 | MapInterval* MapAddress(const Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, |
| 313 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | 333 | std::size_t size) { |
| 314 | } | 334 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); |
| 315 | |||
| 316 | MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, | ||
| 317 | const std::size_t size) { | ||
| 318 | std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); | ||
| 319 | if (overlaps.empty()) { | 335 | if (overlaps.empty()) { |
| 320 | auto& memory_manager = system.GPU().MemoryManager(); | 336 | auto& memory_manager = system.GPU().MemoryManager(); |
| 321 | const VAddr cpu_addr_end = cpu_addr + size; | 337 | const VAddr cpu_addr_end = cpu_addr + size; |
| 322 | MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); | ||
| 323 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 338 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 324 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | 339 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); |
| 325 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); | 340 | UploadBlockData(*block, block->Offset(cpu_addr), size, host_ptr); |
| 326 | } else { | 341 | } else { |
| 327 | staging_buffer.resize(size); | 342 | staging_buffer.resize(size); |
| 328 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | 343 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); |
| 329 | UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); | 344 | UploadBlockData(*block, block->Offset(cpu_addr), size, staging_buffer.data()); |
| 330 | } | 345 | } |
| 331 | Register(new_map); | 346 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); |
| 332 | return new_map; | ||
| 333 | } | 347 | } |
| 334 | 348 | ||
| 335 | const VAddr cpu_addr_end = cpu_addr + size; | 349 | const VAddr cpu_addr_end = cpu_addr + size; |
| 336 | if (overlaps.size() == 1) { | 350 | if (overlaps.size() == 1) { |
| 337 | MapInterval& current_map = overlaps[0]; | 351 | MapInterval* const current_map = overlaps[0]; |
| 338 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { | 352 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { |
| 339 | return current_map; | 353 | return current_map; |
| 340 | } | 354 | } |
| @@ -344,60 +358,70 @@ private: | |||
| 344 | bool write_inheritance = false; | 358 | bool write_inheritance = false; |
| 345 | bool modified_inheritance = false; | 359 | bool modified_inheritance = false; |
| 346 | // Calculate new buffer parameters | 360 | // Calculate new buffer parameters |
| 347 | for (auto& overlap : overlaps) { | 361 | for (MapInterval* overlap : overlaps) { |
| 348 | new_start = std::min(overlap->GetStart(), new_start); | 362 | new_start = std::min(overlap->start, new_start); |
| 349 | new_end = std::max(overlap->GetEnd(), new_end); | 363 | new_end = std::max(overlap->end, new_end); |
| 350 | write_inheritance |= overlap->IsWritten(); | 364 | write_inheritance |= overlap->is_written; |
| 351 | modified_inheritance |= overlap->IsModified(); | 365 | modified_inheritance |= overlap->is_modified; |
| 352 | } | 366 | } |
| 353 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; | 367 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; |
| 354 | for (auto& overlap : overlaps) { | 368 | for (auto& overlap : overlaps) { |
| 355 | Unregister(overlap); | 369 | Unregister(overlap); |
| 356 | } | 370 | } |
| 357 | UpdateBlock(block, new_start, new_end, overlaps); | 371 | UpdateBlock(block, new_start, new_end, overlaps); |
| 358 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | 372 | |
| 373 | const MapInterval new_map{new_start, new_end, new_gpu_addr}; | ||
| 374 | MapInterval* const map = Register(new_map, write_inheritance); | ||
| 375 | if (!map) { | ||
| 376 | return nullptr; | ||
| 377 | } | ||
| 359 | if (modified_inheritance) { | 378 | if (modified_inheritance) { |
| 360 | new_map->MarkAsModified(true, GetModifiedTicks()); | 379 | map->MarkAsModified(true, GetModifiedTicks()); |
| 361 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | 380 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { |
| 362 | MarkForAsyncFlush(new_map); | 381 | MarkForAsyncFlush(map); |
| 363 | } | 382 | } |
| 364 | } | 383 | } |
| 365 | Register(new_map, write_inheritance); | 384 | return map; |
| 366 | return new_map; | ||
| 367 | } | 385 | } |
| 368 | 386 | ||
| 369 | void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, | 387 | void UpdateBlock(const Buffer* block, VAddr start, VAddr end, |
| 370 | std::vector<MapInterval>& overlaps) { | 388 | const VectorMapInterval& overlaps) { |
| 371 | const IntervalType base_interval{start, end}; | 389 | const IntervalType base_interval{start, end}; |
| 372 | IntervalSet interval_set{}; | 390 | IntervalSet interval_set{}; |
| 373 | interval_set.add(base_interval); | 391 | interval_set.add(base_interval); |
| 374 | for (auto& overlap : overlaps) { | 392 | for (auto& overlap : overlaps) { |
| 375 | const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; | 393 | const IntervalType subtract{overlap->start, overlap->end}; |
| 376 | interval_set.subtract(subtract); | 394 | interval_set.subtract(subtract); |
| 377 | } | 395 | } |
| 378 | for (auto& interval : interval_set) { | 396 | for (auto& interval : interval_set) { |
| 379 | std::size_t size = interval.upper() - interval.lower(); | 397 | const std::size_t size = interval.upper() - interval.lower(); |
| 380 | if (size > 0) { | 398 | if (size == 0) { |
| 381 | staging_buffer.resize(size); | 399 | continue; |
| 382 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | ||
| 383 | UploadBlockData(block, block->GetOffset(interval.lower()), size, | ||
| 384 | staging_buffer.data()); | ||
| 385 | } | 400 | } |
| 401 | staging_buffer.resize(size); | ||
| 402 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | ||
| 403 | UploadBlockData(*block, block->Offset(interval.lower()), size, staging_buffer.data()); | ||
| 386 | } | 404 | } |
| 387 | } | 405 | } |
| 388 | 406 | ||
| 389 | std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { | 407 | VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { |
| 408 | VectorMapInterval result; | ||
| 390 | if (size == 0) { | 409 | if (size == 0) { |
| 391 | return {}; | 410 | return result; |
| 392 | } | 411 | } |
| 393 | 412 | ||
| 394 | std::vector<MapInterval> objects{}; | 413 | const VAddr addr_end = addr + size; |
| 395 | const IntervalType interval{addr, addr + size}; | 414 | auto it = mapped_addresses.lower_bound(addr); |
| 396 | for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { | 415 | if (it != mapped_addresses.begin()) { |
| 397 | objects.push_back(pair.second); | 416 | --it; |
| 398 | } | 417 | } |
| 399 | 418 | while (it != mapped_addresses.end() && it->start < addr_end) { | |
| 400 | return objects; | 419 | if (it->Overlaps(addr, addr_end)) { |
| 420 | result.push_back(&*it); | ||
| 421 | } | ||
| 422 | ++it; | ||
| 423 | } | ||
| 424 | return result; | ||
| 401 | } | 425 | } |
| 402 | 426 | ||
| 403 | /// Returns a ticks counter used for tracking when cached objects were last modified | 427 | /// Returns a ticks counter used for tracking when cached objects were last modified |
| @@ -405,20 +429,24 @@ private: | |||
| 405 | return ++modified_ticks; | 429 | return ++modified_ticks; |
| 406 | } | 430 | } |
| 407 | 431 | ||
| 408 | void FlushMap(MapInterval map) { | 432 | void FlushMap(MapInterval* map) { |
| 409 | std::size_t size = map->GetEnd() - map->GetStart(); | 433 | const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); |
| 410 | OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; | 434 | ASSERT_OR_EXECUTE(it != blocks.end(), return;); |
| 435 | |||
| 436 | std::shared_ptr<Buffer> block = it->second; | ||
| 437 | |||
| 438 | const std::size_t size = map->end - map->start; | ||
| 411 | staging_buffer.resize(size); | 439 | staging_buffer.resize(size); |
| 412 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); | 440 | DownloadBlockData(*block, block->Offset(map->start), size, staging_buffer.data()); |
| 413 | system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); | 441 | system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); |
| 414 | map->MarkAsModified(false, 0); | 442 | map->MarkAsModified(false, 0); |
| 415 | } | 443 | } |
| 416 | 444 | ||
| 417 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | 445 | template <typename Callable> |
| 418 | std::size_t alignment) { | 446 | BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { |
| 419 | AlignBuffer(alignment); | 447 | AlignBuffer(alignment); |
| 420 | const std::size_t uploaded_offset = buffer_offset; | 448 | const std::size_t uploaded_offset = buffer_offset; |
| 421 | std::memcpy(buffer_ptr, raw_pointer, size); | 449 | callable(buffer_ptr); |
| 422 | 450 | ||
| 423 | buffer_ptr += size; | 451 | buffer_ptr += size; |
| 424 | buffer_offset += size; | 452 | buffer_offset += size; |
| @@ -432,97 +460,89 @@ private: | |||
| 432 | buffer_offset = offset_aligned; | 460 | buffer_offset = offset_aligned; |
| 433 | } | 461 | } |
| 434 | 462 | ||
| 435 | OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { | 463 | std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { |
| 436 | const std::size_t old_size = buffer->GetSize(); | 464 | const std::size_t old_size = buffer->Size(); |
| 437 | const std::size_t new_size = old_size + block_page_size; | 465 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; |
| 438 | const VAddr cpu_addr = buffer->GetCpuAddr(); | 466 | const VAddr cpu_addr = buffer->CpuAddr(); |
| 439 | OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); | 467 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); |
| 440 | CopyBlock(buffer, new_buffer, 0, 0, old_size); | 468 | CopyBlock(*buffer, *new_buffer, 0, 0, old_size); |
| 441 | buffer->SetEpoch(epoch); | 469 | QueueDestruction(std::move(buffer)); |
| 442 | pending_destruction.push_back(buffer); | 470 | |
| 443 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; | 471 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; |
| 444 | u64 page_start = cpu_addr >> block_page_bits; | 472 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; |
| 445 | const u64 page_end = cpu_addr_end >> block_page_bits; | 473 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { |
| 446 | while (page_start <= page_end) { | 474 | blocks.insert_or_assign(page_start, new_buffer); |
| 447 | blocks[page_start] = new_buffer; | ||
| 448 | ++page_start; | ||
| 449 | } | 475 | } |
| 476 | |||
| 450 | return new_buffer; | 477 | return new_buffer; |
| 451 | } | 478 | } |
| 452 | 479 | ||
| 453 | OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { | 480 | std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, |
| 454 | const std::size_t size_1 = first->GetSize(); | 481 | std::shared_ptr<Buffer> second) { |
| 455 | const std::size_t size_2 = second->GetSize(); | 482 | const std::size_t size_1 = first->Size(); |
| 456 | const VAddr first_addr = first->GetCpuAddr(); | 483 | const std::size_t size_2 = second->Size(); |
| 457 | const VAddr second_addr = second->GetCpuAddr(); | 484 | const VAddr first_addr = first->CpuAddr(); |
| 485 | const VAddr second_addr = second->CpuAddr(); | ||
| 458 | const VAddr new_addr = std::min(first_addr, second_addr); | 486 | const VAddr new_addr = std::min(first_addr, second_addr); |
| 459 | const std::size_t new_size = size_1 + size_2; | 487 | const std::size_t new_size = size_1 + size_2; |
| 460 | OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); | 488 | |
| 461 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | 489 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); |
| 462 | CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); | 490 | CopyBlock(*first, *new_buffer, 0, new_buffer->Offset(first_addr), size_1); |
| 463 | first->SetEpoch(epoch); | 491 | CopyBlock(*second, *new_buffer, 0, new_buffer->Offset(second_addr), size_2); |
| 464 | second->SetEpoch(epoch); | 492 | QueueDestruction(std::move(first)); |
| 465 | pending_destruction.push_back(first); | 493 | QueueDestruction(std::move(second)); |
| 466 | pending_destruction.push_back(second); | 494 | |
| 467 | const VAddr cpu_addr_end = new_addr + new_size - 1; | 495 | const VAddr cpu_addr_end = new_addr + new_size - 1; |
| 468 | u64 page_start = new_addr >> block_page_bits; | 496 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; |
| 469 | const u64 page_end = cpu_addr_end >> block_page_bits; | 497 | for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { |
| 470 | while (page_start <= page_end) { | 498 | blocks.insert_or_assign(page_start, new_buffer); |
| 471 | blocks[page_start] = new_buffer; | ||
| 472 | ++page_start; | ||
| 473 | } | 499 | } |
| 474 | return new_buffer; | 500 | return new_buffer; |
| 475 | } | 501 | } |
| 476 | 502 | ||
| 477 | OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { | 503 | Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { |
| 478 | OwnerBuffer found; | 504 | std::shared_ptr<Buffer> found; |
| 505 | |||
| 479 | const VAddr cpu_addr_end = cpu_addr + size - 1; | 506 | const VAddr cpu_addr_end = cpu_addr + size - 1; |
| 480 | u64 page_start = cpu_addr >> block_page_bits; | 507 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; |
| 481 | const u64 page_end = cpu_addr_end >> block_page_bits; | 508 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { |
| 482 | while (page_start <= page_end) { | ||
| 483 | auto it = blocks.find(page_start); | 509 | auto it = blocks.find(page_start); |
| 484 | if (it == blocks.end()) { | 510 | if (it == blocks.end()) { |
| 485 | if (found) { | 511 | if (found) { |
| 486 | found = EnlargeBlock(found); | 512 | found = EnlargeBlock(found); |
| 487 | } else { | 513 | continue; |
| 488 | const VAddr start_addr = (page_start << block_page_bits); | ||
| 489 | found = CreateBlock(start_addr, block_page_size); | ||
| 490 | blocks[page_start] = found; | ||
| 491 | } | ||
| 492 | } else { | ||
| 493 | if (found) { | ||
| 494 | if (found == it->second) { | ||
| 495 | ++page_start; | ||
| 496 | continue; | ||
| 497 | } | ||
| 498 | found = MergeBlocks(found, it->second); | ||
| 499 | } else { | ||
| 500 | found = it->second; | ||
| 501 | } | 514 | } |
| 515 | const VAddr start_addr = page_start << BLOCK_PAGE_BITS; | ||
| 516 | found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); | ||
| 517 | blocks.insert_or_assign(page_start, found); | ||
| 518 | continue; | ||
| 519 | } | ||
| 520 | if (!found) { | ||
| 521 | found = it->second; | ||
| 522 | continue; | ||
| 523 | } | ||
| 524 | if (found != it->second) { | ||
| 525 | found = MergeBlocks(std::move(found), it->second); | ||
| 502 | } | 526 | } |
| 503 | ++page_start; | ||
| 504 | } | 527 | } |
| 505 | return found; | 528 | return found.get(); |
| 506 | } | 529 | } |
| 507 | 530 | ||
| 508 | void MarkRegionAsWritten(const VAddr start, const VAddr end) { | 531 | void MarkRegionAsWritten(VAddr start, VAddr end) { |
| 509 | u64 page_start = start >> write_page_bit; | 532 | const u64 page_end = end >> WRITE_PAGE_BIT; |
| 510 | const u64 page_end = end >> write_page_bit; | 533 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { |
| 511 | while (page_start <= page_end) { | ||
| 512 | auto it = written_pages.find(page_start); | 534 | auto it = written_pages.find(page_start); |
| 513 | if (it != written_pages.end()) { | 535 | if (it != written_pages.end()) { |
| 514 | it->second = it->second + 1; | 536 | it->second = it->second + 1; |
| 515 | } else { | 537 | } else { |
| 516 | written_pages[page_start] = 1; | 538 | written_pages.insert_or_assign(page_start, 1); |
| 517 | } | 539 | } |
| 518 | page_start++; | ||
| 519 | } | 540 | } |
| 520 | } | 541 | } |
| 521 | 542 | ||
| 522 | void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { | 543 | void UnmarkRegionAsWritten(VAddr start, VAddr end) { |
| 523 | u64 page_start = start >> write_page_bit; | 544 | const u64 page_end = end >> WRITE_PAGE_BIT; |
| 524 | const u64 page_end = end >> write_page_bit; | 545 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { |
| 525 | while (page_start <= page_end) { | ||
| 526 | auto it = written_pages.find(page_start); | 546 | auto it = written_pages.find(page_start); |
| 527 | if (it != written_pages.end()) { | 547 | if (it != written_pages.end()) { |
| 528 | if (it->second > 1) { | 548 | if (it->second > 1) { |
| @@ -531,25 +551,27 @@ private: | |||
| 531 | written_pages.erase(it); | 551 | written_pages.erase(it); |
| 532 | } | 552 | } |
| 533 | } | 553 | } |
| 534 | page_start++; | ||
| 535 | } | 554 | } |
| 536 | } | 555 | } |
| 537 | 556 | ||
| 538 | bool IsRegionWritten(const VAddr start, const VAddr end) const { | 557 | bool IsRegionWritten(VAddr start, VAddr end) const { |
| 539 | u64 page_start = start >> write_page_bit; | 558 | const u64 page_end = end >> WRITE_PAGE_BIT; |
| 540 | const u64 page_end = end >> write_page_bit; | 559 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { |
| 541 | while (page_start <= page_end) { | ||
| 542 | if (written_pages.count(page_start) > 0) { | 560 | if (written_pages.count(page_start) > 0) { |
| 543 | return true; | 561 | return true; |
| 544 | } | 562 | } |
| 545 | page_start++; | ||
| 546 | } | 563 | } |
| 547 | return false; | 564 | return false; |
| 548 | } | 565 | } |
| 549 | 566 | ||
| 550 | void MarkForAsyncFlush(MapInterval& map) { | 567 | void QueueDestruction(std::shared_ptr<Buffer> buffer) { |
| 568 | buffer->SetEpoch(epoch); | ||
| 569 | pending_destruction.push(std::move(buffer)); | ||
| 570 | } | ||
| 571 | |||
| 572 | void MarkForAsyncFlush(MapInterval* map) { | ||
| 551 | if (!uncommitted_flushes) { | 573 | if (!uncommitted_flushes) { |
| 552 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); | 574 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); |
| 553 | } | 575 | } |
| 554 | uncommitted_flushes->insert(map); | 576 | uncommitted_flushes->insert(map); |
| 555 | } | 577 | } |
| @@ -558,35 +580,29 @@ private: | |||
| 558 | Core::System& system; | 580 | Core::System& system; |
| 559 | 581 | ||
| 560 | std::unique_ptr<StreamBuffer> stream_buffer; | 582 | std::unique_ptr<StreamBuffer> stream_buffer; |
| 561 | BufferType stream_buffer_handle{}; | 583 | BufferType stream_buffer_handle; |
| 562 | |||
| 563 | bool invalidated = false; | ||
| 564 | 584 | ||
| 565 | u8* buffer_ptr = nullptr; | 585 | u8* buffer_ptr = nullptr; |
| 566 | u64 buffer_offset = 0; | 586 | u64 buffer_offset = 0; |
| 567 | u64 buffer_offset_base = 0; | 587 | u64 buffer_offset_base = 0; |
| 568 | 588 | ||
| 569 | using IntervalSet = boost::icl::interval_set<VAddr>; | 589 | MapIntervalAllocator mapped_addresses_allocator; |
| 570 | using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; | 590 | boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> |
| 571 | using IntervalType = typename IntervalCache::interval_type; | 591 | mapped_addresses; |
| 572 | IntervalCache mapped_addresses; | ||
| 573 | 592 | ||
| 574 | static constexpr u64 write_page_bit = 11; | ||
| 575 | std::unordered_map<u64, u32> written_pages; | 593 | std::unordered_map<u64, u32> written_pages; |
| 594 | std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; | ||
| 576 | 595 | ||
| 577 | static constexpr u64 block_page_bits = 21; | 596 | std::queue<std::shared_ptr<Buffer>> pending_destruction; |
| 578 | static constexpr u64 block_page_size = 1ULL << block_page_bits; | ||
| 579 | std::unordered_map<u64, OwnerBuffer> blocks; | ||
| 580 | |||
| 581 | std::list<OwnerBuffer> pending_destruction; | ||
| 582 | u64 epoch = 0; | 597 | u64 epoch = 0; |
| 583 | u64 modified_ticks = 0; | 598 | u64 modified_ticks = 0; |
| 584 | 599 | ||
| 585 | std::vector<u8> staging_buffer; | 600 | std::vector<u8> staging_buffer; |
| 586 | std::list<MapInterval> marked_for_unregister; | ||
| 587 | 601 | ||
| 588 | std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; | 602 | std::list<MapInterval*> marked_for_unregister; |
| 589 | std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; | 603 | |
| 604 | std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; | ||
| 605 | std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; | ||
| 590 | 606 | ||
| 591 | std::recursive_mutex mutex; | 607 | std::recursive_mutex mutex; |
| 592 | }; | 608 | }; |
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp new file mode 100644 index 000000000..62587e18a --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.cpp | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | ||
| 9 | |||
| 10 | #include "video_core/buffer_cache/map_interval.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | MapIntervalAllocator::MapIntervalAllocator() { | ||
| 15 | FillFreeList(first_chunk); | ||
| 16 | } | ||
| 17 | |||
| 18 | MapIntervalAllocator::~MapIntervalAllocator() = default; | ||
| 19 | |||
| 20 | void MapIntervalAllocator::AllocateNewChunk() { | ||
| 21 | *new_chunk = std::make_unique<Chunk>(); | ||
| 22 | FillFreeList(**new_chunk); | ||
| 23 | new_chunk = &(*new_chunk)->next; | ||
| 24 | } | ||
| 25 | |||
| 26 | void MapIntervalAllocator::FillFreeList(Chunk& chunk) { | ||
| 27 | const std::size_t old_size = free_list.size(); | ||
| 28 | free_list.resize(old_size + chunk.data.size()); | ||
| 29 | std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, | ||
| 30 | [](MapInterval& interval) { return &interval; }); | ||
| 31 | } | ||
| 32 | |||
| 33 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 29d8b26f3..fe0bcd1d8 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -4,104 +4,89 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <boost/intrusive/set_hook.hpp> | ||
| 13 | |||
| 7 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 8 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 9 | 16 | ||
| 10 | namespace VideoCommon { | 17 | namespace VideoCommon { |
| 11 | 18 | ||
| 12 | class MapIntervalBase { | 19 | struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { |
| 13 | public: | 20 | MapInterval() = default; |
| 14 | MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) | ||
| 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} | ||
| 16 | |||
| 17 | void SetCpuAddress(VAddr new_cpu_addr) { | ||
| 18 | cpu_addr = new_cpu_addr; | ||
| 19 | } | ||
| 20 | |||
| 21 | VAddr GetCpuAddress() const { | ||
| 22 | return cpu_addr; | ||
| 23 | } | ||
| 24 | |||
| 25 | GPUVAddr GetGpuAddress() const { | ||
| 26 | return gpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | bool IsInside(const VAddr other_start, const VAddr other_end) const { | ||
| 30 | return (start <= other_start && other_end <= end); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const MapIntervalBase& rhs) const { | ||
| 34 | return std::tie(start, end) == std::tie(rhs.start, rhs.end); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const MapIntervalBase& rhs) const { | ||
| 38 | return !operator==(rhs); | ||
| 39 | } | ||
| 40 | 21 | ||
| 41 | void MarkAsRegistered(const bool registered) { | 22 | /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} |
| 42 | is_registered = registered; | ||
| 43 | } | ||
| 44 | 23 | ||
| 45 | bool IsRegistered() const { | 24 | explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept |
| 46 | return is_registered; | 25 | : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} |
| 47 | } | ||
| 48 | 26 | ||
| 49 | void SetMemoryMarked(bool is_memory_marked_) { | 27 | bool IsInside(VAddr other_start, VAddr other_end) const noexcept { |
| 50 | is_memory_marked = is_memory_marked_; | 28 | return start <= other_start && other_end <= end; |
| 51 | } | 29 | } |
| 52 | 30 | ||
| 53 | bool IsMemoryMarked() const { | 31 | bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { |
| 54 | return is_memory_marked; | 32 | return start < other_end && other_start < end; |
| 55 | } | 33 | } |
| 56 | 34 | ||
| 57 | void SetSyncPending(bool is_sync_pending_) { | 35 | void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { |
| 58 | is_sync_pending = is_sync_pending_; | 36 | is_modified = is_modified_; |
| 59 | } | 37 | ticks = ticks_; |
| 38 | } | ||
| 39 | |||
| 40 | boost::intrusive::set_member_hook<> member_hook_; | ||
| 41 | VAddr start = 0; | ||
| 42 | VAddr end = 0; | ||
| 43 | GPUVAddr gpu_addr = 0; | ||
| 44 | u64 ticks = 0; | ||
| 45 | bool is_written = false; | ||
| 46 | bool is_modified = false; | ||
| 47 | bool is_registered = false; | ||
| 48 | bool is_memory_marked = false; | ||
| 49 | bool is_sync_pending = false; | ||
| 50 | }; | ||
| 60 | 51 | ||
| 61 | bool IsSyncPending() const { | 52 | struct MapIntervalCompare { |
| 62 | return is_sync_pending; | 53 | constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { |
| 54 | return lhs.start < rhs.start; | ||
| 63 | } | 55 | } |
| 56 | }; | ||
| 64 | 57 | ||
| 65 | VAddr GetStart() const { | 58 | class MapIntervalAllocator { |
| 66 | return start; | 59 | public: |
| 67 | } | 60 | MapIntervalAllocator(); |
| 61 | ~MapIntervalAllocator(); | ||
| 68 | 62 | ||
| 69 | VAddr GetEnd() const { | 63 | MapInterval* Allocate() { |
| 70 | return end; | 64 | if (free_list.empty()) { |
| 65 | AllocateNewChunk(); | ||
| 66 | } | ||
| 67 | MapInterval* const interval = free_list.back(); | ||
| 68 | free_list.pop_back(); | ||
| 69 | return interval; | ||
| 71 | } | 70 | } |
| 72 | 71 | ||
| 73 | void MarkAsModified(const bool is_modified_, const u64 tick) { | 72 | void Release(MapInterval* interval) { |
| 74 | is_modified = is_modified_; | 73 | free_list.push_back(interval); |
| 75 | ticks = tick; | ||
| 76 | } | 74 | } |
| 77 | 75 | ||
| 78 | bool IsModified() const { | 76 | private: |
| 79 | return is_modified; | 77 | struct Chunk { |
| 80 | } | 78 | std::unique_ptr<Chunk> next; |
| 79 | std::array<MapInterval, 0x8000> data; | ||
| 80 | }; | ||
| 81 | 81 | ||
| 82 | u64 GetModificationTick() const { | 82 | void AllocateNewChunk(); |
| 83 | return ticks; | ||
| 84 | } | ||
| 85 | 83 | ||
| 86 | void MarkAsWritten(const bool is_written_) { | 84 | void FillFreeList(Chunk& chunk); |
| 87 | is_written = is_written_; | ||
| 88 | } | ||
| 89 | 85 | ||
| 90 | bool IsWritten() const { | 86 | std::vector<MapInterval*> free_list; |
| 91 | return is_written; | 87 | std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; |
| 92 | } | ||
| 93 | 88 | ||
| 94 | private: | 89 | Chunk first_chunk; |
| 95 | VAddr start; | ||
| 96 | VAddr end; | ||
| 97 | GPUVAddr gpu_addr; | ||
| 98 | VAddr cpu_addr{}; | ||
| 99 | bool is_written{}; | ||
| 100 | bool is_modified{}; | ||
| 101 | bool is_registered{}; | ||
| 102 | bool is_memory_marked{}; | ||
| 103 | bool is_sync_pending{}; | ||
| 104 | u64 ticks{}; | ||
| 105 | }; | 90 | }; |
| 106 | 91 | ||
| 107 | } // namespace VideoCommon | 92 | } // namespace VideoCommon |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index ebe139504..f46e81bb7 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -93,6 +93,7 @@ public: | |||
| 93 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; | 93 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; |
| 94 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 94 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 95 | u64 offset) const = 0; | 95 | u64 offset) const = 0; |
| 96 | virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; | ||
| 96 | virtual u32 GetBoundBuffer() const = 0; | 97 | virtual u32 GetBoundBuffer() const = 0; |
| 97 | 98 | ||
| 98 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; | 99 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index f6237fc6a..a82b06a38 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 92 | ASSERT(stage == ShaderType::Compute); | 92 | ASSERT(stage == ShaderType::Compute); |
| 93 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; | 93 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; |
| 94 | const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; | 94 | const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; |
| 95 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 96 | } | ||
| 95 | 97 | ||
| 96 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 98 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { |
| 99 | const Texture::TextureHandle tex_handle{handle}; | ||
| 97 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 100 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 98 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 101 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 99 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 102 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 18ceedfaf..b7f668d88 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -219,6 +219,8 @@ public: | |||
| 219 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 219 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 220 | u64 offset) const override; | 220 | u64 offset) const override; |
| 221 | 221 | ||
| 222 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 223 | |||
| 222 | u32 GetBoundBuffer() const override { | 224 | u32 GetBoundBuffer() const override { |
| 223 | return regs.tex_cb_index; | 225 | return regs.tex_cb_index; |
| 224 | } | 226 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 024c9e43b..ea3c8a963 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -25,9 +25,8 @@ constexpr u32 MacroRegistersStart = 0xE00; | |||
| 25 | Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 25 | Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 26 | MemoryManager& memory_manager) | 26 | MemoryManager& memory_manager) |
| 27 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, | 27 | : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, |
| 28 | macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { | 28 | macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} { |
| 29 | dirty.flags.flip(); | 29 | dirty.flags.flip(); |
| 30 | |||
| 31 | InitializeRegisterDefaults(); | 30 | InitializeRegisterDefaults(); |
| 32 | } | 31 | } |
| 33 | 32 | ||
| @@ -106,7 +105,11 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 106 | regs.rasterize_enable = 1; | 105 | regs.rasterize_enable = 1; |
| 107 | regs.rt_separate_frag_data = 1; | 106 | regs.rt_separate_frag_data = 1; |
| 108 | regs.framebuffer_srgb = 1; | 107 | regs.framebuffer_srgb = 1; |
| 108 | regs.line_width_aliased = 1.0f; | ||
| 109 | regs.line_width_smooth = 1.0f; | ||
| 109 | regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; | 110 | regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; |
| 111 | regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill; | ||
| 112 | regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill; | ||
| 110 | 113 | ||
| 111 | shadow_state = regs; | 114 | shadow_state = regs; |
| 112 | 115 | ||
| @@ -116,7 +119,7 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 116 | mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; | 119 | mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; |
| 117 | } | 120 | } |
| 118 | 121 | ||
| 119 | void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { | 122 | void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) { |
| 120 | // Reset the current macro. | 123 | // Reset the current macro. |
| 121 | executing_macro = 0; | 124 | executing_macro = 0; |
| 122 | 125 | ||
| @@ -125,7 +128,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 | |||
| 125 | ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); | 128 | ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size()); |
| 126 | 129 | ||
| 127 | // Execute the current macro. | 130 | // Execute the current macro. |
| 128 | macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters); | 131 | macro_engine->Execute(macro_positions[entry], parameters); |
| 129 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { | 132 | if (mme_draw.current_mode != MMEDrawMode::Undefined) { |
| 130 | FlushMMEInlineDraw(); | 133 | FlushMMEInlineDraw(); |
| 131 | } | 134 | } |
| @@ -161,7 +164,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | |||
| 161 | 164 | ||
| 162 | // Call the macro when there are no more parameters in the command buffer | 165 | // Call the macro when there are no more parameters in the command buffer |
| 163 | if (is_last_call) { | 166 | if (is_last_call) { |
| 164 | CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); | 167 | CallMacroMethod(executing_macro, macro_params); |
| 165 | macro_params.clear(); | 168 | macro_params.clear(); |
| 166 | } | 169 | } |
| 167 | return; | 170 | return; |
| @@ -197,7 +200,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | |||
| 197 | break; | 200 | break; |
| 198 | } | 201 | } |
| 199 | case MAXWELL3D_REG_INDEX(macros.data): { | 202 | case MAXWELL3D_REG_INDEX(macros.data): { |
| 200 | ProcessMacroUpload(arg); | 203 | macro_engine->AddCode(regs.macros.upload_address, arg); |
| 201 | break; | 204 | break; |
| 202 | } | 205 | } |
| 203 | case MAXWELL3D_REG_INDEX(macros.bind): { | 206 | case MAXWELL3D_REG_INDEX(macros.bind): { |
| @@ -306,7 +309,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, | |||
| 306 | 309 | ||
| 307 | // Call the macro when there are no more parameters in the command buffer | 310 | // Call the macro when there are no more parameters in the command buffer |
| 308 | if (amount == methods_pending) { | 311 | if (amount == methods_pending) { |
| 309 | CallMacroMethod(executing_macro, macro_params.size(), macro_params.data()); | 312 | CallMacroMethod(executing_macro, macro_params); |
| 310 | macro_params.clear(); | 313 | macro_params.clear(); |
| 311 | } | 314 | } |
| 312 | return; | 315 | return; |
| @@ -420,9 +423,7 @@ void Maxwell3D::FlushMMEInlineDraw() { | |||
| 420 | } | 423 | } |
| 421 | 424 | ||
| 422 | void Maxwell3D::ProcessMacroUpload(u32 data) { | 425 | void Maxwell3D::ProcessMacroUpload(u32 data) { |
| 423 | ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), | 426 | macro_engine->AddCode(regs.macros.upload_address++, data); |
| 424 | "upload_address exceeded macro_memory size!"); | ||
| 425 | macro_memory[regs.macros.upload_address++] = data; | ||
| 426 | } | 427 | } |
| 427 | 428 | ||
| 428 | void Maxwell3D::ProcessMacroBind(u32 data) { | 429 | void Maxwell3D::ProcessMacroBind(u32 data) { |
| @@ -457,8 +458,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 457 | 458 | ||
| 458 | void Maxwell3D::ProcessQueryGet() { | 459 | void Maxwell3D::ProcessQueryGet() { |
| 459 | // TODO(Subv): Support the other query units. | 460 | // TODO(Subv): Support the other query units. |
| 460 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 461 | if (regs.query.query_get.unit != Regs::QueryUnit::Crop) { |
| 461 | "Units other than CROP are unimplemented"); | 462 | LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented"); |
| 463 | } | ||
| 462 | 464 | ||
| 463 | switch (regs.query.query_get.operation) { | 465 | switch (regs.query.query_get.operation) { |
| 464 | case Regs::QueryOperation::Release: | 466 | case Regs::QueryOperation::Release: |
| @@ -534,8 +536,8 @@ void Maxwell3D::ProcessCounterReset() { | |||
| 534 | rasterizer.ResetCounter(QueryType::SamplesPassed); | 536 | rasterizer.ResetCounter(QueryType::SamplesPassed); |
| 535 | break; | 537 | break; |
| 536 | default: | 538 | default: |
| 537 | LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", | 539 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", |
| 538 | static_cast<int>(regs.counter_reset)); | 540 | static_cast<int>(regs.counter_reset)); |
| 539 | break; | 541 | break; |
| 540 | } | 542 | } |
| 541 | } | 543 | } |
| @@ -592,8 +594,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { | |||
| 592 | system.GPU().GetTicks()); | 594 | system.GPU().GetTicks()); |
| 593 | return {}; | 595 | return {}; |
| 594 | default: | 596 | default: |
| 595 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 597 | LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", |
| 596 | static_cast<u32>(regs.query.query_get.select.Value())); | 598 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 597 | return 1; | 599 | return 1; |
| 598 | } | 600 | } |
| 599 | } | 601 | } |
| @@ -738,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 738 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | 740 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; |
| 739 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; | 741 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; |
| 740 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; | 742 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; |
| 743 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 744 | } | ||
| 741 | 745 | ||
| 742 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 746 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { |
| 747 | const Texture::TextureHandle tex_handle{handle}; | ||
| 743 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 748 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 744 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 749 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 745 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 750 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 05dd6b39b..d5fe25065 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #include "video_core/engines/engine_upload.h" | 23 | #include "video_core/engines/engine_upload.h" |
| 24 | #include "video_core/engines/shader_type.h" | 24 | #include "video_core/engines/shader_type.h" |
| 25 | #include "video_core/gpu.h" | 25 | #include "video_core/gpu.h" |
| 26 | #include "video_core/macro_interpreter.h" | 26 | #include "video_core/macro/macro.h" |
| 27 | #include "video_core/textures/texture.h" | 27 | #include "video_core/textures/texture.h" |
| 28 | 28 | ||
| 29 | namespace Core { | 29 | namespace Core { |
| @@ -598,6 +598,7 @@ public: | |||
| 598 | BitField<4, 3, u32> block_height; | 598 | BitField<4, 3, u32> block_height; |
| 599 | BitField<8, 3, u32> block_depth; | 599 | BitField<8, 3, u32> block_depth; |
| 600 | BitField<12, 1, InvMemoryLayout> type; | 600 | BitField<12, 1, InvMemoryLayout> type; |
| 601 | BitField<16, 1, u32> is_3d; | ||
| 601 | } memory_layout; | 602 | } memory_layout; |
| 602 | union { | 603 | union { |
| 603 | BitField<0, 16, u32> layers; | 604 | BitField<0, 16, u32> layers; |
| @@ -1403,6 +1404,8 @@ public: | |||
| 1403 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 1404 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 1404 | u64 offset) const override; | 1405 | u64 offset) const override; |
| 1405 | 1406 | ||
| 1407 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 1408 | |||
| 1406 | u32 GetBoundBuffer() const override { | 1409 | u32 GetBoundBuffer() const override { |
| 1407 | return regs.tex_cb_index; | 1410 | return regs.tex_cb_index; |
| 1408 | } | 1411 | } |
| @@ -1411,15 +1414,6 @@ public: | |||
| 1411 | 1414 | ||
| 1412 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | 1415 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; |
| 1413 | 1416 | ||
| 1414 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | ||
| 1415 | /// we've seen used. | ||
| 1416 | using MacroMemory = std::array<u32, 0x40000>; | ||
| 1417 | |||
| 1418 | /// Gets a reference to macro memory. | ||
| 1419 | const MacroMemory& GetMacroMemory() const { | ||
| 1420 | return macro_memory; | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | bool ShouldExecute() const { | 1417 | bool ShouldExecute() const { |
| 1424 | return execute_on; | 1418 | return execute_on; |
| 1425 | } | 1419 | } |
| @@ -1468,16 +1462,13 @@ private: | |||
| 1468 | 1462 | ||
| 1469 | std::array<bool, Regs::NUM_REGS> mme_inline{}; | 1463 | std::array<bool, Regs::NUM_REGS> mme_inline{}; |
| 1470 | 1464 | ||
| 1471 | /// Memory for macro code | ||
| 1472 | MacroMemory macro_memory; | ||
| 1473 | |||
| 1474 | /// Macro method that is currently being executed / being fed parameters. | 1465 | /// Macro method that is currently being executed / being fed parameters. |
| 1475 | u32 executing_macro = 0; | 1466 | u32 executing_macro = 0; |
| 1476 | /// Parameters that have been submitted to the macro call so far. | 1467 | /// Parameters that have been submitted to the macro call so far. |
| 1477 | std::vector<u32> macro_params; | 1468 | std::vector<u32> macro_params; |
| 1478 | 1469 | ||
| 1479 | /// Interpreter for the macro codes uploaded to the GPU. | 1470 | /// Interpreter for the macro codes uploaded to the GPU. |
| 1480 | MacroInterpreter macro_interpreter; | 1471 | std::unique_ptr<MacroEngine> macro_engine; |
| 1481 | 1472 | ||
| 1482 | static constexpr u32 null_cb_data = 0xFFFFFFFF; | 1473 | static constexpr u32 null_cb_data = 0xFFFFFFFF; |
| 1483 | struct { | 1474 | struct { |
| @@ -1506,7 +1497,7 @@ private: | |||
| 1506 | * @param num_parameters Number of arguments | 1497 | * @param num_parameters Number of arguments |
| 1507 | * @param parameters Arguments to the method call | 1498 | * @param parameters Arguments to the method call |
| 1508 | */ | 1499 | */ |
| 1509 | void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters); | 1500 | void CallMacroMethod(u32 method, const std::vector<u32>& parameters); |
| 1510 | 1501 | ||
| 1511 | /// Handles writes to the macro uploading register. | 1502 | /// Handles writes to the macro uploading register. |
| 1512 | void ProcessMacroUpload(u32 data); | 1503 | void ProcessMacroUpload(u32 data); |
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp new file mode 100644 index 000000000..89077a2d8 --- /dev/null +++ b/src/video_core/macro/macro.cpp | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "core/settings.h" | ||
| 8 | #include "video_core/macro/macro.h" | ||
| 9 | #include "video_core/macro/macro_interpreter.h" | ||
| 10 | #include "video_core/macro/macro_jit_x64.h" | ||
| 11 | |||
| 12 | namespace Tegra { | ||
| 13 | |||
| 14 | void MacroEngine::AddCode(u32 method, u32 data) { | ||
| 15 | uploaded_macro_code[method].push_back(data); | ||
| 16 | } | ||
| 17 | |||
| 18 | void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) { | ||
| 19 | auto compiled_macro = macro_cache.find(method); | ||
| 20 | if (compiled_macro != macro_cache.end()) { | ||
| 21 | compiled_macro->second->Execute(parameters, method); | ||
| 22 | } else { | ||
| 23 | // Macro not compiled, check if it's uploaded and if so, compile it | ||
| 24 | auto macro_code = uploaded_macro_code.find(method); | ||
| 25 | if (macro_code == uploaded_macro_code.end()) { | ||
| 26 | UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); | ||
| 27 | return; | ||
| 28 | } | ||
| 29 | macro_cache[method] = Compile(macro_code->second); | ||
| 30 | macro_cache[method]->Execute(parameters, method); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) { | ||
| 35 | if (Settings::values.disable_macro_jit) { | ||
| 36 | return std::make_unique<MacroInterpreter>(maxwell3d); | ||
| 37 | } | ||
| 38 | #ifdef ARCHITECTURE_x86_64 | ||
| 39 | return std::make_unique<MacroJITx64>(maxwell3d); | ||
| 40 | #else | ||
| 41 | return std::make_unique<MacroInterpreter>(maxwell3d); | ||
| 42 | #endif | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro/macro.h new file mode 100644 index 000000000..b76ed891f --- /dev/null +++ b/src/video_core/macro/macro.h | |||
| @@ -0,0 +1,128 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <vector> | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | |||
| 13 | namespace Tegra { | ||
| 14 | namespace Engines { | ||
| 15 | class Maxwell3D; | ||
| 16 | } | ||
| 17 | namespace Macro { | ||
| 18 | constexpr std::size_t NUM_MACRO_REGISTERS = 8; | ||
| 19 | enum class Operation : u32 { | ||
| 20 | ALU = 0, | ||
| 21 | AddImmediate = 1, | ||
| 22 | ExtractInsert = 2, | ||
| 23 | ExtractShiftLeftImmediate = 3, | ||
| 24 | ExtractShiftLeftRegister = 4, | ||
| 25 | Read = 5, | ||
| 26 | Unused = 6, // This operation doesn't seem to be a valid encoding. | ||
| 27 | Branch = 7, | ||
| 28 | }; | ||
| 29 | |||
| 30 | enum class ALUOperation : u32 { | ||
| 31 | Add = 0, | ||
| 32 | AddWithCarry = 1, | ||
| 33 | Subtract = 2, | ||
| 34 | SubtractWithBorrow = 3, | ||
| 35 | // Operations 4-7 don't seem to be valid encodings. | ||
| 36 | Xor = 8, | ||
| 37 | Or = 9, | ||
| 38 | And = 10, | ||
| 39 | AndNot = 11, | ||
| 40 | Nand = 12 | ||
| 41 | }; | ||
| 42 | |||
| 43 | enum class ResultOperation : u32 { | ||
| 44 | IgnoreAndFetch = 0, | ||
| 45 | Move = 1, | ||
| 46 | MoveAndSetMethod = 2, | ||
| 47 | FetchAndSend = 3, | ||
| 48 | MoveAndSend = 4, | ||
| 49 | FetchAndSetMethod = 5, | ||
| 50 | MoveAndSetMethodFetchAndSend = 6, | ||
| 51 | MoveAndSetMethodSend = 7 | ||
| 52 | }; | ||
| 53 | |||
| 54 | enum class BranchCondition : u32 { | ||
| 55 | Zero = 0, | ||
| 56 | NotZero = 1, | ||
| 57 | }; | ||
| 58 | |||
| 59 | union Opcode { | ||
| 60 | u32 raw; | ||
| 61 | BitField<0, 3, Operation> operation; | ||
| 62 | BitField<4, 3, ResultOperation> result_operation; | ||
| 63 | BitField<4, 1, BranchCondition> branch_condition; | ||
| 64 | // If set on a branch, then the branch doesn't have a delay slot. | ||
| 65 | BitField<5, 1, u32> branch_annul; | ||
| 66 | BitField<7, 1, u32> is_exit; | ||
| 67 | BitField<8, 3, u32> dst; | ||
| 68 | BitField<11, 3, u32> src_a; | ||
| 69 | BitField<14, 3, u32> src_b; | ||
| 70 | // The signed immediate overlaps the second source operand and the alu operation. | ||
| 71 | BitField<14, 18, s32> immediate; | ||
| 72 | |||
| 73 | BitField<17, 5, ALUOperation> alu_operation; | ||
| 74 | |||
| 75 | // Bitfield instructions data | ||
| 76 | BitField<17, 5, u32> bf_src_bit; | ||
| 77 | BitField<22, 5, u32> bf_size; | ||
| 78 | BitField<27, 5, u32> bf_dst_bit; | ||
| 79 | |||
| 80 | u32 GetBitfieldMask() const { | ||
| 81 | return (1 << bf_size) - 1; | ||
| 82 | } | ||
| 83 | |||
| 84 | s32 GetBranchTarget() const { | ||
| 85 | return static_cast<s32>(immediate * sizeof(u32)); | ||
| 86 | } | ||
| 87 | }; | ||
| 88 | |||
| 89 | union MethodAddress { | ||
| 90 | u32 raw; | ||
| 91 | BitField<0, 12, u32> address; | ||
| 92 | BitField<12, 6, u32> increment; | ||
| 93 | }; | ||
| 94 | |||
| 95 | } // namespace Macro | ||
| 96 | |||
| 97 | class CachedMacro { | ||
| 98 | public: | ||
| 99 | virtual ~CachedMacro() = default; | ||
| 100 | /** | ||
| 101 | * Executes the macro code with the specified input parameters. | ||
| 102 | * @param code The macro byte code to execute | ||
| 103 | * @param parameters The parameters of the macro | ||
| 104 | */ | ||
| 105 | virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0; | ||
| 106 | }; | ||
| 107 | |||
| 108 | class MacroEngine { | ||
| 109 | public: | ||
| 110 | virtual ~MacroEngine() = default; | ||
| 111 | |||
| 112 | // Store the uploaded macro code to compile them when they're called. | ||
| 113 | void AddCode(u32 method, u32 data); | ||
| 114 | |||
| 115 | // Compiles the macro if its not in the cache, and executes the compiled macro | ||
| 116 | void Execute(u32 method, const std::vector<u32>& parameters); | ||
| 117 | |||
| 118 | protected: | ||
| 119 | virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0; | ||
| 120 | |||
| 121 | private: | ||
| 122 | std::unordered_map<u32, std::unique_ptr<CachedMacro>> macro_cache; | ||
| 123 | std::unordered_map<u32, std::vector<u32>> uploaded_macro_code; | ||
| 124 | }; | ||
| 125 | |||
| 126 | std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d); | ||
| 127 | |||
| 128 | } // namespace Tegra | ||
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index 947364928..5edff27aa 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | 1 | // Copyright 2020 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| @@ -6,109 +6,46 @@ | |||
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/microprofile.h" | 7 | #include "common/microprofile.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/macro_interpreter.h" | 9 | #include "video_core/macro/macro_interpreter.h" |
| 10 | 10 | ||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); |
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | namespace { | 14 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 15 | enum class Operation : u32 { | ||
| 16 | ALU = 0, | ||
| 17 | AddImmediate = 1, | ||
| 18 | ExtractInsert = 2, | ||
| 19 | ExtractShiftLeftImmediate = 3, | ||
| 20 | ExtractShiftLeftRegister = 4, | ||
| 21 | Read = 5, | ||
| 22 | Unused = 6, // This operation doesn't seem to be a valid encoding. | ||
| 23 | Branch = 7, | ||
| 24 | }; | ||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | enum class MacroInterpreter::ALUOperation : u32 { | ||
| 28 | Add = 0, | ||
| 29 | AddWithCarry = 1, | ||
| 30 | Subtract = 2, | ||
| 31 | SubtractWithBorrow = 3, | ||
| 32 | // Operations 4-7 don't seem to be valid encodings. | ||
| 33 | Xor = 8, | ||
| 34 | Or = 9, | ||
| 35 | And = 10, | ||
| 36 | AndNot = 11, | ||
| 37 | Nand = 12 | ||
| 38 | }; | ||
| 39 | |||
| 40 | enum class MacroInterpreter::ResultOperation : u32 { | ||
| 41 | IgnoreAndFetch = 0, | ||
| 42 | Move = 1, | ||
| 43 | MoveAndSetMethod = 2, | ||
| 44 | FetchAndSend = 3, | ||
| 45 | MoveAndSend = 4, | ||
| 46 | FetchAndSetMethod = 5, | ||
| 47 | MoveAndSetMethodFetchAndSend = 6, | ||
| 48 | MoveAndSetMethodSend = 7 | ||
| 49 | }; | ||
| 50 | |||
| 51 | enum class MacroInterpreter::BranchCondition : u32 { | ||
| 52 | Zero = 0, | ||
| 53 | NotZero = 1, | ||
| 54 | }; | ||
| 55 | |||
| 56 | union MacroInterpreter::Opcode { | ||
| 57 | u32 raw; | ||
| 58 | BitField<0, 3, Operation> operation; | ||
| 59 | BitField<4, 3, ResultOperation> result_operation; | ||
| 60 | BitField<4, 1, BranchCondition> branch_condition; | ||
| 61 | // If set on a branch, then the branch doesn't have a delay slot. | ||
| 62 | BitField<5, 1, u32> branch_annul; | ||
| 63 | BitField<7, 1, u32> is_exit; | ||
| 64 | BitField<8, 3, u32> dst; | ||
| 65 | BitField<11, 3, u32> src_a; | ||
| 66 | BitField<14, 3, u32> src_b; | ||
| 67 | // The signed immediate overlaps the second source operand and the alu operation. | ||
| 68 | BitField<14, 18, s32> immediate; | ||
| 69 | |||
| 70 | BitField<17, 5, ALUOperation> alu_operation; | ||
| 71 | |||
| 72 | // Bitfield instructions data | ||
| 73 | BitField<17, 5, u32> bf_src_bit; | ||
| 74 | BitField<22, 5, u32> bf_size; | ||
| 75 | BitField<27, 5, u32> bf_dst_bit; | ||
| 76 | |||
| 77 | u32 GetBitfieldMask() const { | ||
| 78 | return (1 << bf_size) - 1; | ||
| 79 | } | ||
| 80 | 15 | ||
| 81 | s32 GetBranchTarget() const { | 16 | std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) { |
| 82 | return static_cast<s32>(immediate * sizeof(u32)); | 17 | return std::make_unique<MacroInterpreterImpl>(maxwell3d, code); |
| 83 | } | 18 | } |
| 84 | }; | ||
| 85 | 19 | ||
| 86 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 20 | MacroInterpreterImpl::MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, |
| 21 | const std::vector<u32>& code) | ||
| 22 | : maxwell3d(maxwell3d), code(code) {} | ||
| 87 | 23 | ||
| 88 | void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) { | 24 | void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 method) { |
| 89 | MICROPROFILE_SCOPE(MacroInterp); | 25 | MICROPROFILE_SCOPE(MacroInterp); |
| 90 | Reset(); | 26 | Reset(); |
| 91 | 27 | ||
| 92 | registers[1] = parameters[0]; | 28 | registers[1] = parameters[0]; |
| 29 | num_parameters = parameters.size(); | ||
| 93 | 30 | ||
| 94 | if (num_parameters > parameters_capacity) { | 31 | if (num_parameters > parameters_capacity) { |
| 95 | parameters_capacity = num_parameters; | 32 | parameters_capacity = num_parameters; |
| 96 | this->parameters = std::make_unique<u32[]>(num_parameters); | 33 | this->parameters = std::make_unique<u32[]>(num_parameters); |
| 97 | } | 34 | } |
| 98 | std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32)); | 35 | std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); |
| 99 | this->num_parameters = num_parameters; | 36 | this->num_parameters = num_parameters; |
| 100 | 37 | ||
| 101 | // Execute the code until we hit an exit condition. | 38 | // Execute the code until we hit an exit condition. |
| 102 | bool keep_executing = true; | 39 | bool keep_executing = true; |
| 103 | while (keep_executing) { | 40 | while (keep_executing) { |
| 104 | keep_executing = Step(offset, false); | 41 | keep_executing = Step(false); |
| 105 | } | 42 | } |
| 106 | 43 | ||
| 107 | // Assert the the macro used all the input parameters | 44 | // Assert the the macro used all the input parameters |
| 108 | ASSERT(next_parameter_index == num_parameters); | 45 | ASSERT(next_parameter_index == num_parameters); |
| 109 | } | 46 | } |
| 110 | 47 | ||
| 111 | void MacroInterpreter::Reset() { | 48 | void MacroInterpreterImpl::Reset() { |
| 112 | registers = {}; | 49 | registers = {}; |
| 113 | pc = 0; | 50 | pc = 0; |
| 114 | delayed_pc = {}; | 51 | delayed_pc = {}; |
| @@ -120,10 +57,10 @@ void MacroInterpreter::Reset() { | |||
| 120 | carry_flag = false; | 57 | carry_flag = false; |
| 121 | } | 58 | } |
| 122 | 59 | ||
| 123 | bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | 60 | bool MacroInterpreterImpl::Step(bool is_delay_slot) { |
| 124 | u32 base_address = pc; | 61 | u32 base_address = pc; |
| 125 | 62 | ||
| 126 | Opcode opcode = GetOpcode(offset); | 63 | Macro::Opcode opcode = GetOpcode(); |
| 127 | pc += 4; | 64 | pc += 4; |
| 128 | 65 | ||
| 129 | // Update the program counter if we were delayed | 66 | // Update the program counter if we were delayed |
| @@ -134,18 +71,18 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | |||
| 134 | } | 71 | } |
| 135 | 72 | ||
| 136 | switch (opcode.operation) { | 73 | switch (opcode.operation) { |
| 137 | case Operation::ALU: { | 74 | case Macro::Operation::ALU: { |
| 138 | u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), | 75 | u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), |
| 139 | GetRegister(opcode.src_b)); | 76 | GetRegister(opcode.src_b)); |
| 140 | ProcessResult(opcode.result_operation, opcode.dst, result); | 77 | ProcessResult(opcode.result_operation, opcode.dst, result); |
| 141 | break; | 78 | break; |
| 142 | } | 79 | } |
| 143 | case Operation::AddImmediate: { | 80 | case Macro::Operation::AddImmediate: { |
| 144 | ProcessResult(opcode.result_operation, opcode.dst, | 81 | ProcessResult(opcode.result_operation, opcode.dst, |
| 145 | GetRegister(opcode.src_a) + opcode.immediate); | 82 | GetRegister(opcode.src_a) + opcode.immediate); |
| 146 | break; | 83 | break; |
| 147 | } | 84 | } |
| 148 | case Operation::ExtractInsert: { | 85 | case Macro::Operation::ExtractInsert: { |
| 149 | u32 dst = GetRegister(opcode.src_a); | 86 | u32 dst = GetRegister(opcode.src_a); |
| 150 | u32 src = GetRegister(opcode.src_b); | 87 | u32 src = GetRegister(opcode.src_b); |
| 151 | 88 | ||
| @@ -155,7 +92,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | |||
| 155 | ProcessResult(opcode.result_operation, opcode.dst, dst); | 92 | ProcessResult(opcode.result_operation, opcode.dst, dst); |
| 156 | break; | 93 | break; |
| 157 | } | 94 | } |
| 158 | case Operation::ExtractShiftLeftImmediate: { | 95 | case Macro::Operation::ExtractShiftLeftImmediate: { |
| 159 | u32 dst = GetRegister(opcode.src_a); | 96 | u32 dst = GetRegister(opcode.src_a); |
| 160 | u32 src = GetRegister(opcode.src_b); | 97 | u32 src = GetRegister(opcode.src_b); |
| 161 | 98 | ||
| @@ -164,7 +101,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | |||
| 164 | ProcessResult(opcode.result_operation, opcode.dst, result); | 101 | ProcessResult(opcode.result_operation, opcode.dst, result); |
| 165 | break; | 102 | break; |
| 166 | } | 103 | } |
| 167 | case Operation::ExtractShiftLeftRegister: { | 104 | case Macro::Operation::ExtractShiftLeftRegister: { |
| 168 | u32 dst = GetRegister(opcode.src_a); | 105 | u32 dst = GetRegister(opcode.src_a); |
| 169 | u32 src = GetRegister(opcode.src_b); | 106 | u32 src = GetRegister(opcode.src_b); |
| 170 | 107 | ||
| @@ -173,12 +110,12 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | |||
| 173 | ProcessResult(opcode.result_operation, opcode.dst, result); | 110 | ProcessResult(opcode.result_operation, opcode.dst, result); |
| 174 | break; | 111 | break; |
| 175 | } | 112 | } |
| 176 | case Operation::Read: { | 113 | case Macro::Operation::Read: { |
| 177 | u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); | 114 | u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); |
| 178 | ProcessResult(opcode.result_operation, opcode.dst, result); | 115 | ProcessResult(opcode.result_operation, opcode.dst, result); |
| 179 | break; | 116 | break; |
| 180 | } | 117 | } |
| 181 | case Operation::Branch: { | 118 | case Macro::Operation::Branch: { |
| 182 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | 119 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); |
| 183 | u32 value = GetRegister(opcode.src_a); | 120 | u32 value = GetRegister(opcode.src_a); |
| 184 | bool taken = EvaluateBranchCondition(opcode.branch_condition, value); | 121 | bool taken = EvaluateBranchCondition(opcode.branch_condition, value); |
| @@ -191,7 +128,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | |||
| 191 | 128 | ||
| 192 | delayed_pc = base_address + opcode.GetBranchTarget(); | 129 | delayed_pc = base_address + opcode.GetBranchTarget(); |
| 193 | // Execute one more instruction due to the delay slot. | 130 | // Execute one more instruction due to the delay slot. |
| 194 | return Step(offset, true); | 131 | return Step(true); |
| 195 | } | 132 | } |
| 196 | break; | 133 | break; |
| 197 | } | 134 | } |
| @@ -204,51 +141,44 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { | |||
| 204 | // cause an exit if it's executed inside a delay slot. | 141 | // cause an exit if it's executed inside a delay slot. |
| 205 | if (opcode.is_exit && !is_delay_slot) { | 142 | if (opcode.is_exit && !is_delay_slot) { |
| 206 | // Exit has a delay slot, execute the next instruction | 143 | // Exit has a delay slot, execute the next instruction |
| 207 | Step(offset, true); | 144 | Step(true); |
| 208 | return false; | 145 | return false; |
| 209 | } | 146 | } |
| 210 | 147 | ||
| 211 | return true; | 148 | return true; |
| 212 | } | 149 | } |
| 213 | 150 | ||
| 214 | MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const { | 151 | u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { |
| 215 | const auto& macro_memory{maxwell3d.GetMacroMemory()}; | ||
| 216 | ASSERT((pc % sizeof(u32)) == 0); | ||
| 217 | ASSERT((pc + offset) < macro_memory.size() * sizeof(u32)); | ||
| 218 | return {macro_memory[offset + pc / sizeof(u32)]}; | ||
| 219 | } | ||
| 220 | |||
| 221 | u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) { | ||
| 222 | switch (operation) { | 152 | switch (operation) { |
| 223 | case ALUOperation::Add: { | 153 | case Macro::ALUOperation::Add: { |
| 224 | const u64 result{static_cast<u64>(src_a) + src_b}; | 154 | const u64 result{static_cast<u64>(src_a) + src_b}; |
| 225 | carry_flag = result > 0xffffffff; | 155 | carry_flag = result > 0xffffffff; |
| 226 | return static_cast<u32>(result); | 156 | return static_cast<u32>(result); |
| 227 | } | 157 | } |
| 228 | case ALUOperation::AddWithCarry: { | 158 | case Macro::ALUOperation::AddWithCarry: { |
| 229 | const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; | 159 | const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; |
| 230 | carry_flag = result > 0xffffffff; | 160 | carry_flag = result > 0xffffffff; |
| 231 | return static_cast<u32>(result); | 161 | return static_cast<u32>(result); |
| 232 | } | 162 | } |
| 233 | case ALUOperation::Subtract: { | 163 | case Macro::ALUOperation::Subtract: { |
| 234 | const u64 result{static_cast<u64>(src_a) - src_b}; | 164 | const u64 result{static_cast<u64>(src_a) - src_b}; |
| 235 | carry_flag = result < 0x100000000; | 165 | carry_flag = result < 0x100000000; |
| 236 | return static_cast<u32>(result); | 166 | return static_cast<u32>(result); |
| 237 | } | 167 | } |
| 238 | case ALUOperation::SubtractWithBorrow: { | 168 | case Macro::ALUOperation::SubtractWithBorrow: { |
| 239 | const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; | 169 | const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; |
| 240 | carry_flag = result < 0x100000000; | 170 | carry_flag = result < 0x100000000; |
| 241 | return static_cast<u32>(result); | 171 | return static_cast<u32>(result); |
| 242 | } | 172 | } |
| 243 | case ALUOperation::Xor: | 173 | case Macro::ALUOperation::Xor: |
| 244 | return src_a ^ src_b; | 174 | return src_a ^ src_b; |
| 245 | case ALUOperation::Or: | 175 | case Macro::ALUOperation::Or: |
| 246 | return src_a | src_b; | 176 | return src_a | src_b; |
| 247 | case ALUOperation::And: | 177 | case Macro::ALUOperation::And: |
| 248 | return src_a & src_b; | 178 | return src_a & src_b; |
| 249 | case ALUOperation::AndNot: | 179 | case Macro::ALUOperation::AndNot: |
| 250 | return src_a & ~src_b; | 180 | return src_a & ~src_b; |
| 251 | case ALUOperation::Nand: | 181 | case Macro::ALUOperation::Nand: |
| 252 | return ~(src_a & src_b); | 182 | return ~(src_a & src_b); |
| 253 | 183 | ||
| 254 | default: | 184 | default: |
| @@ -257,43 +187,43 @@ u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) | |||
| 257 | } | 187 | } |
| 258 | } | 188 | } |
| 259 | 189 | ||
| 260 | void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 result) { | 190 | void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { |
| 261 | switch (operation) { | 191 | switch (operation) { |
| 262 | case ResultOperation::IgnoreAndFetch: | 192 | case Macro::ResultOperation::IgnoreAndFetch: |
| 263 | // Fetch parameter and ignore result. | 193 | // Fetch parameter and ignore result. |
| 264 | SetRegister(reg, FetchParameter()); | 194 | SetRegister(reg, FetchParameter()); |
| 265 | break; | 195 | break; |
| 266 | case ResultOperation::Move: | 196 | case Macro::ResultOperation::Move: |
| 267 | // Move result. | 197 | // Move result. |
| 268 | SetRegister(reg, result); | 198 | SetRegister(reg, result); |
| 269 | break; | 199 | break; |
| 270 | case ResultOperation::MoveAndSetMethod: | 200 | case Macro::ResultOperation::MoveAndSetMethod: |
| 271 | // Move result and use as Method Address. | 201 | // Move result and use as Method Address. |
| 272 | SetRegister(reg, result); | 202 | SetRegister(reg, result); |
| 273 | SetMethodAddress(result); | 203 | SetMethodAddress(result); |
| 274 | break; | 204 | break; |
| 275 | case ResultOperation::FetchAndSend: | 205 | case Macro::ResultOperation::FetchAndSend: |
| 276 | // Fetch parameter and send result. | 206 | // Fetch parameter and send result. |
| 277 | SetRegister(reg, FetchParameter()); | 207 | SetRegister(reg, FetchParameter()); |
| 278 | Send(result); | 208 | Send(result); |
| 279 | break; | 209 | break; |
| 280 | case ResultOperation::MoveAndSend: | 210 | case Macro::ResultOperation::MoveAndSend: |
| 281 | // Move and send result. | 211 | // Move and send result. |
| 282 | SetRegister(reg, result); | 212 | SetRegister(reg, result); |
| 283 | Send(result); | 213 | Send(result); |
| 284 | break; | 214 | break; |
| 285 | case ResultOperation::FetchAndSetMethod: | 215 | case Macro::ResultOperation::FetchAndSetMethod: |
| 286 | // Fetch parameter and use result as Method Address. | 216 | // Fetch parameter and use result as Method Address. |
| 287 | SetRegister(reg, FetchParameter()); | 217 | SetRegister(reg, FetchParameter()); |
| 288 | SetMethodAddress(result); | 218 | SetMethodAddress(result); |
| 289 | break; | 219 | break; |
| 290 | case ResultOperation::MoveAndSetMethodFetchAndSend: | 220 | case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: |
| 291 | // Move result and use as Method Address, then fetch and send parameter. | 221 | // Move result and use as Method Address, then fetch and send parameter. |
| 292 | SetRegister(reg, result); | 222 | SetRegister(reg, result); |
| 293 | SetMethodAddress(result); | 223 | SetMethodAddress(result); |
| 294 | Send(FetchParameter()); | 224 | Send(FetchParameter()); |
| 295 | break; | 225 | break; |
| 296 | case ResultOperation::MoveAndSetMethodSend: | 226 | case Macro::ResultOperation::MoveAndSetMethodSend: |
| 297 | // Move result and use as Method Address, then send bits 12:17 of result. | 227 | // Move result and use as Method Address, then send bits 12:17 of result. |
| 298 | SetRegister(reg, result); | 228 | SetRegister(reg, result); |
| 299 | SetMethodAddress(result); | 229 | SetMethodAddress(result); |
| @@ -304,16 +234,28 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res | |||
| 304 | } | 234 | } |
| 305 | } | 235 | } |
| 306 | 236 | ||
| 307 | u32 MacroInterpreter::FetchParameter() { | 237 | bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { |
| 308 | ASSERT(next_parameter_index < num_parameters); | 238 | switch (cond) { |
| 309 | return parameters[next_parameter_index++]; | 239 | case Macro::BranchCondition::Zero: |
| 240 | return value == 0; | ||
| 241 | case Macro::BranchCondition::NotZero: | ||
| 242 | return value != 0; | ||
| 243 | } | ||
| 244 | UNREACHABLE(); | ||
| 245 | return true; | ||
| 310 | } | 246 | } |
| 311 | 247 | ||
| 312 | u32 MacroInterpreter::GetRegister(u32 register_id) const { | 248 | Macro::Opcode MacroInterpreterImpl::GetOpcode() const { |
| 249 | ASSERT((pc % sizeof(u32)) == 0); | ||
| 250 | ASSERT(pc < code.size() * sizeof(u32)); | ||
| 251 | return {code[pc / sizeof(u32)]}; | ||
| 252 | } | ||
| 253 | |||
| 254 | u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { | ||
| 313 | return registers.at(register_id); | 255 | return registers.at(register_id); |
| 314 | } | 256 | } |
| 315 | 257 | ||
| 316 | void MacroInterpreter::SetRegister(u32 register_id, u32 value) { | 258 | void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { |
| 317 | // Register 0 is hardwired as the zero register. | 259 | // Register 0 is hardwired as the zero register. |
| 318 | // Ensure no writes to it actually occur. | 260 | // Ensure no writes to it actually occur. |
| 319 | if (register_id == 0) { | 261 | if (register_id == 0) { |
| @@ -323,30 +265,24 @@ void MacroInterpreter::SetRegister(u32 register_id, u32 value) { | |||
| 323 | registers.at(register_id) = value; | 265 | registers.at(register_id) = value; |
| 324 | } | 266 | } |
| 325 | 267 | ||
| 326 | void MacroInterpreter::SetMethodAddress(u32 address) { | 268 | void MacroInterpreterImpl::SetMethodAddress(u32 address) { |
| 327 | method_address.raw = address; | 269 | method_address.raw = address; |
| 328 | } | 270 | } |
| 329 | 271 | ||
| 330 | void MacroInterpreter::Send(u32 value) { | 272 | void MacroInterpreterImpl::Send(u32 value) { |
| 331 | maxwell3d.CallMethodFromMME(method_address.address, value); | 273 | maxwell3d.CallMethodFromMME(method_address.address, value); |
| 332 | // Increment the method address by the method increment. | 274 | // Increment the method address by the method increment. |
| 333 | method_address.address.Assign(method_address.address.Value() + | 275 | method_address.address.Assign(method_address.address.Value() + |
| 334 | method_address.increment.Value()); | 276 | method_address.increment.Value()); |
| 335 | } | 277 | } |
| 336 | 278 | ||
| 337 | u32 MacroInterpreter::Read(u32 method) const { | 279 | u32 MacroInterpreterImpl::Read(u32 method) const { |
| 338 | return maxwell3d.GetRegisterValue(method); | 280 | return maxwell3d.GetRegisterValue(method); |
| 339 | } | 281 | } |
| 340 | 282 | ||
| 341 | bool MacroInterpreter::EvaluateBranchCondition(BranchCondition cond, u32 value) const { | 283 | u32 MacroInterpreterImpl::FetchParameter() { |
| 342 | switch (cond) { | 284 | ASSERT(next_parameter_index < num_parameters); |
| 343 | case BranchCondition::Zero: | 285 | return parameters[next_parameter_index++]; |
| 344 | return value == 0; | ||
| 345 | case BranchCondition::NotZero: | ||
| 346 | return value != 0; | ||
| 347 | } | ||
| 348 | UNREACHABLE(); | ||
| 349 | return true; | ||
| 350 | } | 286 | } |
| 351 | 287 | ||
| 352 | } // namespace Tegra | 288 | } // namespace Tegra |
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h index 631146d89..90217fc89 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro/macro_interpreter.h | |||
| @@ -1,44 +1,37 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | 1 | // Copyright 2020 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | |||
| 7 | #include <array> | 6 | #include <array> |
| 8 | #include <optional> | 7 | #include <optional> |
| 9 | 8 | #include <vector> | |
| 10 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 11 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/macro/macro.h" | ||
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | namespace Engines { | 14 | namespace Engines { |
| 15 | class Maxwell3D; | 15 | class Maxwell3D; |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | class MacroInterpreter final { | 18 | class MacroInterpreter final : public MacroEngine { |
| 19 | public: | 19 | public: |
| 20 | explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); | 20 | explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d); |
| 21 | 21 | ||
| 22 | /** | 22 | protected: |
| 23 | * Executes the macro code with the specified input parameters. | 23 | std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; |
| 24 | * @param offset Offset to start execution at. | ||
| 25 | * @param parameters The parameters of the macro. | ||
| 26 | */ | ||
| 27 | void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); | ||
| 28 | 24 | ||
| 29 | private: | 25 | private: |
| 30 | enum class ALUOperation : u32; | 26 | Engines::Maxwell3D& maxwell3d; |
| 31 | enum class BranchCondition : u32; | 27 | }; |
| 32 | enum class ResultOperation : u32; | ||
| 33 | |||
| 34 | union Opcode; | ||
| 35 | 28 | ||
| 36 | union MethodAddress { | 29 | class MacroInterpreterImpl : public CachedMacro { |
| 37 | u32 raw; | 30 | public: |
| 38 | BitField<0, 12, u32> address; | 31 | MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); |
| 39 | BitField<12, 6, u32> increment; | 32 | void Execute(const std::vector<u32>& parameters, u32 method) override; |
| 40 | }; | ||
| 41 | 33 | ||
| 34 | private: | ||
| 42 | /// Resets the execution engine state, zeroing registers, etc. | 35 | /// Resets the execution engine state, zeroing registers, etc. |
| 43 | void Reset(); | 36 | void Reset(); |
| 44 | 37 | ||
| @@ -49,20 +42,20 @@ private: | |||
| 49 | * @param is_delay_slot Whether the current step is being executed due to a delay slot in a | 42 | * @param is_delay_slot Whether the current step is being executed due to a delay slot in a |
| 50 | * previous instruction. | 43 | * previous instruction. |
| 51 | */ | 44 | */ |
| 52 | bool Step(u32 offset, bool is_delay_slot); | 45 | bool Step(bool is_delay_slot); |
| 53 | 46 | ||
| 54 | /// Calculates the result of an ALU operation. src_a OP src_b; | 47 | /// Calculates the result of an ALU operation. src_a OP src_b; |
| 55 | u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b); | 48 | u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); |
| 56 | 49 | ||
| 57 | /// Performs the result operation on the input result and stores it in the specified register | 50 | /// Performs the result operation on the input result and stores it in the specified register |
| 58 | /// (if necessary). | 51 | /// (if necessary). |
| 59 | void ProcessResult(ResultOperation operation, u32 reg, u32 result); | 52 | void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); |
| 60 | 53 | ||
| 61 | /// Evaluates the branch condition and returns whether the branch should be taken or not. | 54 | /// Evaluates the branch condition and returns whether the branch should be taken or not. |
| 62 | bool EvaluateBranchCondition(BranchCondition cond, u32 value) const; | 55 | bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; |
| 63 | 56 | ||
| 64 | /// Reads an opcode at the current program counter location. | 57 | /// Reads an opcode at the current program counter location. |
| 65 | Opcode GetOpcode(u32 offset) const; | 58 | Macro::Opcode GetOpcode() const; |
| 66 | 59 | ||
| 67 | /// Returns the specified register's value. Register 0 is hardcoded to always return 0. | 60 | /// Returns the specified register's value. Register 0 is hardcoded to always return 0. |
| 68 | u32 GetRegister(u32 register_id) const; | 61 | u32 GetRegister(u32 register_id) const; |
| @@ -89,13 +82,11 @@ private: | |||
| 89 | /// Program counter to execute at after the delay slot is executed. | 82 | /// Program counter to execute at after the delay slot is executed. |
| 90 | std::optional<u32> delayed_pc; | 83 | std::optional<u32> delayed_pc; |
| 91 | 84 | ||
| 92 | static constexpr std::size_t NumMacroRegisters = 8; | ||
| 93 | |||
| 94 | /// General purpose macro registers. | 85 | /// General purpose macro registers. |
| 95 | std::array<u32, NumMacroRegisters> registers = {}; | 86 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {}; |
| 96 | 87 | ||
| 97 | /// Method address to use for the next Send instruction. | 88 | /// Method address to use for the next Send instruction. |
| 98 | MethodAddress method_address = {}; | 89 | Macro::MethodAddress method_address = {}; |
| 99 | 90 | ||
| 100 | /// Input parameters of the current macro. | 91 | /// Input parameters of the current macro. |
| 101 | std::unique_ptr<u32[]> parameters; | 92 | std::unique_ptr<u32[]> parameters; |
| @@ -105,5 +96,7 @@ private: | |||
| 105 | u32 next_parameter_index = 0; | 96 | u32 next_parameter_index = 0; |
| 106 | 97 | ||
| 107 | bool carry_flag = false; | 98 | bool carry_flag = false; |
| 99 | const std::vector<u32>& code; | ||
| 108 | }; | 100 | }; |
| 101 | |||
| 109 | } // namespace Tegra | 102 | } // namespace Tegra |
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp new file mode 100644 index 000000000..11c1cc3be --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.cpp | |||
| @@ -0,0 +1,640 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/logging/log.h" | ||
| 7 | #include "common/microprofile.h" | ||
| 8 | #include "common/x64/xbyak_util.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/macro/macro_interpreter.h" | ||
| 11 | #include "video_core/macro/macro_jit_x64.h" | ||
| 12 | |||
| 13 | MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255, 47)); | ||
| 14 | MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r9; | ||
| 18 | static const Xbyak::Reg64 REGISTERS = Xbyak::util::r10; | ||
| 19 | static const Xbyak::Reg64 STATE = Xbyak::util::r11; | ||
| 20 | static const Xbyak::Reg64 NEXT_PARAMETER = Xbyak::util::r12; | ||
| 21 | static const Xbyak::Reg32 RESULT = Xbyak::util::r13d; | ||
| 22 | static const Xbyak::Reg64 RESULT_64 = Xbyak::util::r13; | ||
| 23 | static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; | ||
| 24 | static const Xbyak::Reg64 METHOD_ADDRESS_64 = Xbyak::util::r14; | ||
| 25 | static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; | ||
| 26 | |||
| 27 | static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ | ||
| 28 | PARAMETERS, | ||
| 29 | REGISTERS, | ||
| 30 | STATE, | ||
| 31 | NEXT_PARAMETER, | ||
| 32 | RESULT, | ||
| 33 | METHOD_ADDRESS, | ||
| 34 | BRANCH_HOLDER, | ||
| 35 | }); | ||
| 36 | |||
| 37 | MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | ||
| 38 | |||
| 39 | std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) { | ||
| 40 | return std::make_unique<MacroJITx64Impl>(maxwell3d, code); | ||
| 41 | } | ||
| 42 | |||
| 43 | MacroJITx64Impl::MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code) | ||
| 44 | : Xbyak::CodeGenerator(MAX_CODE_SIZE), code(code), maxwell3d(maxwell3d) { | ||
| 45 | Compile(); | ||
| 46 | } | ||
| 47 | |||
| 48 | MacroJITx64Impl::~MacroJITx64Impl() = default; | ||
| 49 | |||
| 50 | void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) { | ||
| 51 | MICROPROFILE_SCOPE(MacroJitExecute); | ||
| 52 | ASSERT_OR_EXECUTE(program != nullptr, { return; }); | ||
| 53 | JITState state{}; | ||
| 54 | state.maxwell3d = &maxwell3d; | ||
| 55 | state.registers = {}; | ||
| 56 | state.parameters = parameters.data(); | ||
| 57 | program(&state); | ||
| 58 | } | ||
| 59 | |||
| 60 | void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { | ||
| 61 | const bool is_a_zero = opcode.src_a == 0; | ||
| 62 | const bool is_b_zero = opcode.src_b == 0; | ||
| 63 | const bool valid_operation = !is_a_zero && !is_b_zero; | ||
| 64 | const bool is_move_operation = !is_a_zero && is_b_zero; | ||
| 65 | const bool has_zero_register = is_a_zero || is_b_zero; | ||
| 66 | |||
| 67 | Xbyak::Reg64 src_a; | ||
| 68 | Xbyak::Reg32 src_b; | ||
| 69 | |||
| 70 | if (!optimizer.zero_reg_skip) { | ||
| 71 | src_a = Compile_GetRegister(opcode.src_a, RESULT_64); | ||
| 72 | src_b = Compile_GetRegister(opcode.src_b, ebx); | ||
| 73 | } else { | ||
| 74 | if (!is_a_zero) { | ||
| 75 | src_a = Compile_GetRegister(opcode.src_a, RESULT_64); | ||
| 76 | } | ||
| 77 | if (!is_b_zero) { | ||
| 78 | src_b = Compile_GetRegister(opcode.src_b, ebx); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | Xbyak::Label skip_carry{}; | ||
| 82 | |||
| 83 | bool has_emitted = false; | ||
| 84 | |||
| 85 | switch (opcode.alu_operation) { | ||
| 86 | case Macro::ALUOperation::Add: | ||
| 87 | if (optimizer.zero_reg_skip) { | ||
| 88 | if (valid_operation) { | ||
| 89 | add(src_a, src_b); | ||
| 90 | } | ||
| 91 | } else { | ||
| 92 | add(src_a, src_b); | ||
| 93 | } | ||
| 94 | |||
| 95 | if (!optimizer.can_skip_carry) { | ||
| 96 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 97 | } | ||
| 98 | break; | ||
| 99 | case Macro::ALUOperation::AddWithCarry: | ||
| 100 | bt(dword[STATE + offsetof(JITState, carry_flag)], 0); | ||
| 101 | adc(src_a, src_b); | ||
| 102 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 103 | break; | ||
| 104 | case Macro::ALUOperation::Subtract: | ||
| 105 | if (optimizer.zero_reg_skip) { | ||
| 106 | if (valid_operation) { | ||
| 107 | sub(src_a, src_b); | ||
| 108 | has_emitted = true; | ||
| 109 | } | ||
| 110 | } else { | ||
| 111 | sub(src_a, src_b); | ||
| 112 | has_emitted = true; | ||
| 113 | } | ||
| 114 | if (!optimizer.can_skip_carry && has_emitted) { | ||
| 115 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 116 | } | ||
| 117 | break; | ||
| 118 | case Macro::ALUOperation::SubtractWithBorrow: | ||
| 119 | bt(dword[STATE + offsetof(JITState, carry_flag)], 0); | ||
| 120 | sbb(src_a, src_b); | ||
| 121 | setc(byte[STATE + offsetof(JITState, carry_flag)]); | ||
| 122 | break; | ||
| 123 | case Macro::ALUOperation::Xor: | ||
| 124 | if (optimizer.zero_reg_skip) { | ||
| 125 | if (valid_operation) { | ||
| 126 | xor_(src_a, src_b); | ||
| 127 | } | ||
| 128 | } else { | ||
| 129 | xor_(src_a, src_b); | ||
| 130 | } | ||
| 131 | break; | ||
| 132 | case Macro::ALUOperation::Or: | ||
| 133 | if (optimizer.zero_reg_skip) { | ||
| 134 | if (valid_operation) { | ||
| 135 | or_(src_a, src_b); | ||
| 136 | } | ||
| 137 | } else { | ||
| 138 | or_(src_a, src_b); | ||
| 139 | } | ||
| 140 | break; | ||
| 141 | case Macro::ALUOperation::And: | ||
| 142 | if (optimizer.zero_reg_skip) { | ||
| 143 | if (!has_zero_register) { | ||
| 144 | and_(src_a, src_b); | ||
| 145 | } | ||
| 146 | } else { | ||
| 147 | and_(src_a, src_b); | ||
| 148 | } | ||
| 149 | break; | ||
| 150 | case Macro::ALUOperation::AndNot: | ||
| 151 | if (optimizer.zero_reg_skip) { | ||
| 152 | if (!is_a_zero) { | ||
| 153 | not_(src_b); | ||
| 154 | and_(src_a, src_b); | ||
| 155 | } | ||
| 156 | } else { | ||
| 157 | not_(src_b); | ||
| 158 | and_(src_a, src_b); | ||
| 159 | } | ||
| 160 | break; | ||
| 161 | case Macro::ALUOperation::Nand: | ||
| 162 | if (optimizer.zero_reg_skip) { | ||
| 163 | if (!is_a_zero) { | ||
| 164 | and_(src_a, src_b); | ||
| 165 | not_(src_a); | ||
| 166 | } | ||
| 167 | } else { | ||
| 168 | and_(src_a, src_b); | ||
| 169 | not_(src_a); | ||
| 170 | } | ||
| 171 | break; | ||
| 172 | default: | ||
| 173 | UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", | ||
| 174 | static_cast<std::size_t>(opcode.alu_operation.Value())); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 178 | } | ||
| 179 | |||
| 180 | void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { | ||
| 181 | if (optimizer.skip_dummy_addimmediate) { | ||
| 182 | // Games tend to use this as an exit instruction placeholder. It's to encode an instruction | ||
| 183 | // without doing anything. In our case we can just not emit anything. | ||
| 184 | if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { | ||
| 185 | return; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | // Check for redundant moves | ||
| 189 | if (optimizer.optimize_for_method_move && | ||
| 190 | opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { | ||
| 191 | if (next_opcode.has_value()) { | ||
| 192 | const auto next = *next_opcode; | ||
| 193 | if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod) { | ||
| 194 | return; | ||
| 195 | } | ||
| 196 | } | ||
| 197 | } | ||
| 198 | if (optimizer.zero_reg_skip && opcode.src_a == 0) { | ||
| 199 | if (opcode.immediate == 0) { | ||
| 200 | xor_(RESULT, RESULT); | ||
| 201 | } else { | ||
| 202 | mov(RESULT, opcode.immediate); | ||
| 203 | } | ||
| 204 | } else { | ||
| 205 | auto result = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 206 | if (opcode.immediate > 2) { | ||
| 207 | add(result, opcode.immediate); | ||
| 208 | } else if (opcode.immediate == 1) { | ||
| 209 | inc(result); | ||
| 210 | } else if (opcode.immediate < 0) { | ||
| 211 | sub(result, opcode.immediate * -1); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 215 | } | ||
| 216 | |||
| 217 | void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { | ||
| 218 | auto dst = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 219 | auto src = Compile_GetRegister(opcode.src_b, eax); | ||
| 220 | |||
| 221 | if (opcode.bf_src_bit != 0 && opcode.bf_src_bit != 31) { | ||
| 222 | shr(src, opcode.bf_src_bit); | ||
| 223 | } else if (opcode.bf_src_bit == 31) { | ||
| 224 | xor_(src, src); | ||
| 225 | } | ||
| 226 | // Don't bother masking the whole register since we're using a 32 bit register | ||
| 227 | if (opcode.bf_size != 31 && opcode.bf_size != 0) { | ||
| 228 | and_(src, opcode.GetBitfieldMask()); | ||
| 229 | } else if (opcode.bf_size == 0) { | ||
| 230 | xor_(src, src); | ||
| 231 | } | ||
| 232 | if (opcode.bf_dst_bit != 31 && opcode.bf_dst_bit != 0) { | ||
| 233 | shl(src, opcode.bf_dst_bit); | ||
| 234 | } else if (opcode.bf_dst_bit == 31) { | ||
| 235 | xor_(src, src); | ||
| 236 | } | ||
| 237 | |||
| 238 | const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); | ||
| 239 | if (mask != 0xffffffff) { | ||
| 240 | and_(dst, mask); | ||
| 241 | } | ||
| 242 | or_(dst, src); | ||
| 243 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 244 | } | ||
| 245 | |||
| 246 | void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { | ||
| 247 | auto dst = Compile_GetRegister(opcode.src_a, eax); | ||
| 248 | auto src = Compile_GetRegister(opcode.src_b, RESULT); | ||
| 249 | |||
| 250 | shr(src, al); | ||
| 251 | if (opcode.bf_size != 0 && opcode.bf_size != 31) { | ||
| 252 | and_(src, opcode.GetBitfieldMask()); | ||
| 253 | } else if (opcode.bf_size == 0) { | ||
| 254 | xor_(src, src); | ||
| 255 | } | ||
| 256 | |||
| 257 | if (opcode.bf_dst_bit != 0 && opcode.bf_dst_bit != 31) { | ||
| 258 | shl(src, opcode.bf_dst_bit); | ||
| 259 | } else if (opcode.bf_dst_bit == 31) { | ||
| 260 | xor_(src, src); | ||
| 261 | } | ||
| 262 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 263 | } | ||
| 264 | |||
| 265 | void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { | ||
| 266 | auto dst = Compile_GetRegister(opcode.src_a, eax); | ||
| 267 | auto src = Compile_GetRegister(opcode.src_b, RESULT); | ||
| 268 | |||
| 269 | if (opcode.bf_src_bit != 0) { | ||
| 270 | shr(src, opcode.bf_src_bit); | ||
| 271 | } | ||
| 272 | |||
| 273 | if (opcode.bf_size != 31) { | ||
| 274 | and_(src, opcode.GetBitfieldMask()); | ||
| 275 | } | ||
| 276 | shl(src, al); | ||
| 277 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 278 | } | ||
| 279 | |||
| 280 | static u32 Read(Engines::Maxwell3D* maxwell3d, u32 method) { | ||
| 281 | return maxwell3d->GetRegisterValue(method); | ||
| 282 | } | ||
| 283 | |||
| 284 | static void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { | ||
| 285 | maxwell3d->CallMethodFromMME(method_address.address, value); | ||
| 286 | } | ||
| 287 | |||
| 288 | void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { | ||
| 289 | if (optimizer.zero_reg_skip && opcode.src_a == 0) { | ||
| 290 | if (opcode.immediate == 0) { | ||
| 291 | xor_(RESULT, RESULT); | ||
| 292 | } else { | ||
| 293 | mov(RESULT, opcode.immediate); | ||
| 294 | } | ||
| 295 | } else { | ||
| 296 | auto result = Compile_GetRegister(opcode.src_a, RESULT); | ||
| 297 | if (opcode.immediate > 2) { | ||
| 298 | add(result, opcode.immediate); | ||
| 299 | } else if (opcode.immediate == 1) { | ||
| 300 | inc(result); | ||
| 301 | } else if (opcode.immediate < 0) { | ||
| 302 | sub(result, opcode.immediate * -1); | ||
| 303 | } | ||
| 304 | } | ||
| 305 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | ||
| 306 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | ||
| 307 | mov(Common::X64::ABI_PARAM2, RESULT); | ||
| 308 | Common::X64::CallFarFunction(*this, &Read); | ||
| 309 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | ||
| 310 | mov(RESULT, Common::X64::ABI_RETURN.cvt32()); | ||
| 311 | Compile_ProcessResult(opcode.result_operation, opcode.dst); | ||
| 312 | } | ||
| 313 | |||
| 314 | void Tegra::MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { | ||
| 315 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | ||
| 316 | mov(Common::X64::ABI_PARAM1, qword[STATE]); | ||
| 317 | mov(Common::X64::ABI_PARAM2, METHOD_ADDRESS); | ||
| 318 | mov(Common::X64::ABI_PARAM3, value); | ||
| 319 | Common::X64::CallFarFunction(*this, &Send); | ||
| 320 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, PersistentCallerSavedRegs(), 0); | ||
| 321 | |||
| 322 | Xbyak::Label dont_process{}; | ||
| 323 | // Get increment | ||
| 324 | test(METHOD_ADDRESS, 0x3f000); | ||
| 325 | // If zero, method address doesn't update | ||
| 326 | je(dont_process); | ||
| 327 | |||
| 328 | mov(ecx, METHOD_ADDRESS); | ||
| 329 | and_(METHOD_ADDRESS, 0xfff); | ||
| 330 | shr(ecx, 12); | ||
| 331 | and_(ecx, 0x3f); | ||
| 332 | lea(eax, ptr[rcx + METHOD_ADDRESS_64]); | ||
| 333 | sal(ecx, 12); | ||
| 334 | or_(eax, ecx); | ||
| 335 | |||
| 336 | mov(METHOD_ADDRESS, eax); | ||
| 337 | |||
| 338 | L(dont_process); | ||
| 339 | } | ||
| 340 | |||
| 341 | void Tegra::MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { | ||
| 342 | ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); | ||
| 343 | const s32 jump_address = | ||
| 344 | static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32)); | ||
| 345 | |||
| 346 | Xbyak::Label end; | ||
| 347 | auto value = Compile_GetRegister(opcode.src_a, eax); | ||
| 348 | test(value, value); | ||
| 349 | if (optimizer.has_delayed_pc) { | ||
| 350 | switch (opcode.branch_condition) { | ||
| 351 | case Macro::BranchCondition::Zero: | ||
| 352 | jne(end, T_NEAR); | ||
| 353 | break; | ||
| 354 | case Macro::BranchCondition::NotZero: | ||
| 355 | je(end, T_NEAR); | ||
| 356 | break; | ||
| 357 | } | ||
| 358 | |||
| 359 | if (opcode.branch_annul) { | ||
| 360 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 361 | jmp(labels[jump_address], T_NEAR); | ||
| 362 | } else { | ||
| 363 | Xbyak::Label handle_post_exit{}; | ||
| 364 | Xbyak::Label skip{}; | ||
| 365 | jmp(skip, T_NEAR); | ||
| 366 | if (opcode.is_exit) { | ||
| 367 | L(handle_post_exit); | ||
| 368 | // Execute 1 instruction | ||
| 369 | mov(BRANCH_HOLDER, end_of_code); | ||
| 370 | // Jump to next instruction to skip delay slot check | ||
| 371 | jmp(labels[jump_address], T_NEAR); | ||
| 372 | } else { | ||
| 373 | L(handle_post_exit); | ||
| 374 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 375 | jmp(labels[jump_address], T_NEAR); | ||
| 376 | } | ||
| 377 | L(skip); | ||
| 378 | mov(BRANCH_HOLDER, handle_post_exit); | ||
| 379 | jmp(delay_skip[pc], T_NEAR); | ||
| 380 | } | ||
| 381 | } else { | ||
| 382 | switch (opcode.branch_condition) { | ||
| 383 | case Macro::BranchCondition::Zero: | ||
| 384 | je(labels[jump_address], T_NEAR); | ||
| 385 | break; | ||
| 386 | case Macro::BranchCondition::NotZero: | ||
| 387 | jne(labels[jump_address], T_NEAR); | ||
| 388 | break; | ||
| 389 | } | ||
| 390 | } | ||
| 391 | |||
| 392 | L(end); | ||
| 393 | } | ||
| 394 | |||
| 395 | void Tegra::MacroJITx64Impl::Optimizer_ScanFlags() { | ||
| 396 | optimizer.can_skip_carry = true; | ||
| 397 | optimizer.has_delayed_pc = false; | ||
| 398 | for (auto raw_op : code) { | ||
| 399 | Macro::Opcode op{}; | ||
| 400 | op.raw = raw_op; | ||
| 401 | |||
| 402 | if (op.operation == Macro::Operation::ALU) { | ||
| 403 | // Scan for any ALU operations which actually use the carry flag, if they don't exist in | ||
| 404 | // our current code we can skip emitting the carry flag handling operations | ||
| 405 | if (op.alu_operation == Macro::ALUOperation::AddWithCarry || | ||
| 406 | op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { | ||
| 407 | optimizer.can_skip_carry = false; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | |||
| 411 | if (op.operation == Macro::Operation::Branch) { | ||
| 412 | if (!op.branch_annul) { | ||
| 413 | optimizer.has_delayed_pc = true; | ||
| 414 | } | ||
| 415 | } | ||
| 416 | } | ||
| 417 | } | ||
| 418 | |||
| 419 | void MacroJITx64Impl::Compile() { | ||
| 420 | MICROPROFILE_SCOPE(MacroJitCompile); | ||
| 421 | bool keep_executing = true; | ||
| 422 | labels.fill(Xbyak::Label()); | ||
| 423 | |||
| 424 | Common::X64::ABI_PushRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | ||
| 425 | // JIT state | ||
| 426 | mov(STATE, Common::X64::ABI_PARAM1); | ||
| 427 | mov(PARAMETERS, qword[Common::X64::ABI_PARAM1 + | ||
| 428 | static_cast<Xbyak::uint32>(offsetof(JITState, parameters))]); | ||
| 429 | mov(REGISTERS, Common::X64::ABI_PARAM1); | ||
| 430 | add(REGISTERS, static_cast<Xbyak::uint32>(offsetof(JITState, registers))); | ||
| 431 | xor_(RESULT, RESULT); | ||
| 432 | xor_(METHOD_ADDRESS, METHOD_ADDRESS); | ||
| 433 | xor_(NEXT_PARAMETER, NEXT_PARAMETER); | ||
| 434 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 435 | |||
| 436 | mov(dword[REGISTERS + 4], Compile_FetchParameter()); | ||
| 437 | |||
| 438 | // Track get register for zero registers and mark it as no-op | ||
| 439 | optimizer.zero_reg_skip = true; | ||
| 440 | |||
| 441 | // AddImmediate tends to be used as a NOP instruction, if we detect this we can | ||
| 442 | // completely skip the entire code path and no emit anything | ||
| 443 | optimizer.skip_dummy_addimmediate = true; | ||
| 444 | |||
| 445 | // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting | ||
| 446 | // one if our register isn't "dirty" | ||
| 447 | optimizer.optimize_for_method_move = true; | ||
| 448 | |||
| 449 | // Check to see if we can skip emitting certain instructions | ||
| 450 | Optimizer_ScanFlags(); | ||
| 451 | |||
| 452 | const u32 op_count = static_cast<u32>(code.size()); | ||
| 453 | for (u32 i = 0; i < op_count; i++) { | ||
| 454 | if (i < op_count - 1) { | ||
| 455 | pc = i + 1; | ||
| 456 | next_opcode = GetOpCode(); | ||
| 457 | } else { | ||
| 458 | next_opcode = {}; | ||
| 459 | } | ||
| 460 | pc = i; | ||
| 461 | Compile_NextInstruction(); | ||
| 462 | } | ||
| 463 | |||
| 464 | L(end_of_code); | ||
| 465 | |||
| 466 | Common::X64::ABI_PopRegistersAndAdjustStackGPS(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); | ||
| 467 | ret(); | ||
| 468 | ready(); | ||
| 469 | program = getCode<ProgramType>(); | ||
| 470 | } | ||
| 471 | |||
| 472 | bool MacroJITx64Impl::Compile_NextInstruction() { | ||
| 473 | const auto opcode = GetOpCode(); | ||
| 474 | if (labels[pc].getAddress()) { | ||
| 475 | return false; | ||
| 476 | } | ||
| 477 | |||
| 478 | L(labels[pc]); | ||
| 479 | |||
| 480 | switch (opcode.operation) { | ||
| 481 | case Macro::Operation::ALU: | ||
| 482 | Compile_ALU(opcode); | ||
| 483 | break; | ||
| 484 | case Macro::Operation::AddImmediate: | ||
| 485 | Compile_AddImmediate(opcode); | ||
| 486 | break; | ||
| 487 | case Macro::Operation::ExtractInsert: | ||
| 488 | Compile_ExtractInsert(opcode); | ||
| 489 | break; | ||
| 490 | case Macro::Operation::ExtractShiftLeftImmediate: | ||
| 491 | Compile_ExtractShiftLeftImmediate(opcode); | ||
| 492 | break; | ||
| 493 | case Macro::Operation::ExtractShiftLeftRegister: | ||
| 494 | Compile_ExtractShiftLeftRegister(opcode); | ||
| 495 | break; | ||
| 496 | case Macro::Operation::Read: | ||
| 497 | Compile_Read(opcode); | ||
| 498 | break; | ||
| 499 | case Macro::Operation::Branch: | ||
| 500 | Compile_Branch(opcode); | ||
| 501 | break; | ||
| 502 | default: | ||
| 503 | UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); | ||
| 504 | break; | ||
| 505 | } | ||
| 506 | |||
| 507 | if (optimizer.has_delayed_pc) { | ||
| 508 | if (opcode.is_exit) { | ||
| 509 | mov(rax, end_of_code); | ||
| 510 | test(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 511 | cmove(BRANCH_HOLDER, rax); | ||
| 512 | // Jump to next instruction to skip delay slot check | ||
| 513 | je(labels[pc + 1], T_NEAR); | ||
| 514 | } else { | ||
| 515 | // TODO(ogniK): Optimize delay slot branching | ||
| 516 | Xbyak::Label no_delay_slot{}; | ||
| 517 | test(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 518 | je(no_delay_slot, T_NEAR); | ||
| 519 | mov(rax, BRANCH_HOLDER); | ||
| 520 | xor_(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 521 | jmp(rax); | ||
| 522 | L(no_delay_slot); | ||
| 523 | } | ||
| 524 | L(delay_skip[pc]); | ||
| 525 | if (opcode.is_exit) { | ||
| 526 | return false; | ||
| 527 | } | ||
| 528 | } else { | ||
| 529 | test(BRANCH_HOLDER, BRANCH_HOLDER); | ||
| 530 | jne(end_of_code, T_NEAR); | ||
| 531 | if (opcode.is_exit) { | ||
| 532 | inc(BRANCH_HOLDER); | ||
| 533 | return false; | ||
| 534 | } | ||
| 535 | } | ||
| 536 | return true; | ||
| 537 | } | ||
| 538 | |||
| 539 | Xbyak::Reg32 Tegra::MacroJITx64Impl::Compile_FetchParameter() { | ||
| 540 | mov(eax, dword[PARAMETERS + NEXT_PARAMETER * sizeof(u32)]); | ||
| 541 | inc(NEXT_PARAMETER); | ||
| 542 | return eax; | ||
| 543 | } | ||
| 544 | |||
| 545 | Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { | ||
| 546 | if (index == 0) { | ||
| 547 | // Register 0 is always zero | ||
| 548 | xor_(dst, dst); | ||
| 549 | } else { | ||
| 550 | mov(dst, dword[REGISTERS + index * sizeof(u32)]); | ||
| 551 | } | ||
| 552 | |||
| 553 | return dst; | ||
| 554 | } | ||
| 555 | |||
| 556 | Xbyak::Reg64 Tegra::MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg64 dst) { | ||
| 557 | if (index == 0) { | ||
| 558 | // Register 0 is always zero | ||
| 559 | xor_(dst, dst); | ||
| 560 | } else { | ||
| 561 | mov(dst, dword[REGISTERS + index * sizeof(u32)]); | ||
| 562 | } | ||
| 563 | |||
| 564 | return dst; | ||
| 565 | } | ||
| 566 | |||
| 567 | void Tegra::MacroJITx64Impl::Compile_WriteCarry(Xbyak::Reg64 dst) { | ||
| 568 | Xbyak::Label zero{}, end{}; | ||
| 569 | xor_(ecx, ecx); | ||
| 570 | shr(dst, 32); | ||
| 571 | setne(cl); | ||
| 572 | mov(dword[STATE + offsetof(JITState, carry_flag)], ecx); | ||
| 573 | } | ||
| 574 | |||
| 575 | void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { | ||
| 576 | auto SetRegister = [=](u32 reg, Xbyak::Reg32 result) { | ||
| 577 | // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero | ||
| 578 | // register. | ||
| 579 | if (reg == 0) { | ||
| 580 | return; | ||
| 581 | } | ||
| 582 | mov(dword[REGISTERS + reg * sizeof(u32)], result); | ||
| 583 | }; | ||
| 584 | auto SetMethodAddress = [=](Xbyak::Reg32 reg) { mov(METHOD_ADDRESS, reg); }; | ||
| 585 | |||
| 586 | switch (operation) { | ||
| 587 | case Macro::ResultOperation::IgnoreAndFetch: | ||
| 588 | SetRegister(reg, Compile_FetchParameter()); | ||
| 589 | break; | ||
| 590 | case Macro::ResultOperation::Move: | ||
| 591 | SetRegister(reg, RESULT); | ||
| 592 | break; | ||
| 593 | case Macro::ResultOperation::MoveAndSetMethod: | ||
| 594 | SetRegister(reg, RESULT); | ||
| 595 | SetMethodAddress(RESULT); | ||
| 596 | break; | ||
| 597 | case Macro::ResultOperation::FetchAndSend: | ||
| 598 | // Fetch parameter and send result. | ||
| 599 | SetRegister(reg, Compile_FetchParameter()); | ||
| 600 | Compile_Send(RESULT); | ||
| 601 | break; | ||
| 602 | case Macro::ResultOperation::MoveAndSend: | ||
| 603 | // Move and send result. | ||
| 604 | SetRegister(reg, RESULT); | ||
| 605 | Compile_Send(RESULT); | ||
| 606 | break; | ||
| 607 | case Macro::ResultOperation::FetchAndSetMethod: | ||
| 608 | // Fetch parameter and use result as Method Address. | ||
| 609 | SetRegister(reg, Compile_FetchParameter()); | ||
| 610 | SetMethodAddress(RESULT); | ||
| 611 | break; | ||
| 612 | case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: | ||
| 613 | // Move result and use as Method Address, then fetch and send parameter. | ||
| 614 | SetRegister(reg, RESULT); | ||
| 615 | SetMethodAddress(RESULT); | ||
| 616 | Compile_Send(Compile_FetchParameter()); | ||
| 617 | break; | ||
| 618 | case Macro::ResultOperation::MoveAndSetMethodSend: | ||
| 619 | // Move result and use as Method Address, then send bits 12:17 of result. | ||
| 620 | SetRegister(reg, RESULT); | ||
| 621 | SetMethodAddress(RESULT); | ||
| 622 | shr(RESULT, 12); | ||
| 623 | and_(RESULT, 0b111111); | ||
| 624 | Compile_Send(RESULT); | ||
| 625 | break; | ||
| 626 | default: | ||
| 627 | UNIMPLEMENTED_MSG("Unimplemented macro operation {}", static_cast<std::size_t>(operation)); | ||
| 628 | } | ||
| 629 | } | ||
| 630 | |||
| 631 | Macro::Opcode MacroJITx64Impl::GetOpCode() const { | ||
| 632 | ASSERT(pc < code.size()); | ||
| 633 | return {code[pc]}; | ||
| 634 | } | ||
| 635 | |||
| 636 | std::bitset<32> MacroJITx64Impl::PersistentCallerSavedRegs() const { | ||
| 637 | return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; | ||
| 638 | } | ||
| 639 | |||
| 640 | } // namespace Tegra | ||
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h new file mode 100644 index 000000000..71f738b9a --- /dev/null +++ b/src/video_core/macro/macro_jit_x64.h | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <bitset> | ||
| 9 | #include <xbyak.h> | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/x64/xbyak_abi.h" | ||
| 13 | #include "video_core/macro/macro.h" | ||
| 14 | |||
| 15 | namespace Tegra { | ||
| 16 | |||
| 17 | namespace Engines { | ||
| 18 | class Maxwell3D; | ||
| 19 | } | ||
| 20 | |||
| 21 | /// MAX_CODE_SIZE is arbitrarily chosen based on current booting games | ||
| 22 | constexpr size_t MAX_CODE_SIZE = 0x10000; | ||
| 23 | |||
| 24 | class MacroJITx64 final : public MacroEngine { | ||
| 25 | public: | ||
| 26 | explicit MacroJITx64(Engines::Maxwell3D& maxwell3d); | ||
| 27 | |||
| 28 | protected: | ||
| 29 | std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override; | ||
| 30 | |||
| 31 | private: | ||
| 32 | Engines::Maxwell3D& maxwell3d; | ||
| 33 | }; | ||
| 34 | |||
| 35 | class MacroJITx64Impl : public Xbyak::CodeGenerator, public CachedMacro { | ||
| 36 | public: | ||
| 37 | MacroJITx64Impl(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& code); | ||
| 38 | ~MacroJITx64Impl(); | ||
| 39 | |||
| 40 | void Execute(const std::vector<u32>& parameters, u32 method) override; | ||
| 41 | |||
| 42 | void Compile_ALU(Macro::Opcode opcode); | ||
| 43 | void Compile_AddImmediate(Macro::Opcode opcode); | ||
| 44 | void Compile_ExtractInsert(Macro::Opcode opcode); | ||
| 45 | void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); | ||
| 46 | void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); | ||
| 47 | void Compile_Read(Macro::Opcode opcode); | ||
| 48 | void Compile_Branch(Macro::Opcode opcode); | ||
| 49 | |||
| 50 | private: | ||
| 51 | void Optimizer_ScanFlags(); | ||
| 52 | |||
| 53 | void Compile(); | ||
| 54 | bool Compile_NextInstruction(); | ||
| 55 | |||
| 56 | Xbyak::Reg32 Compile_FetchParameter(); | ||
| 57 | Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); | ||
| 58 | Xbyak::Reg64 Compile_GetRegister(u32 index, Xbyak::Reg64 dst); | ||
| 59 | void Compile_WriteCarry(Xbyak::Reg64 dst); | ||
| 60 | |||
| 61 | void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); | ||
| 62 | void Compile_Send(Xbyak::Reg32 value); | ||
| 63 | |||
| 64 | Macro::Opcode GetOpCode() const; | ||
| 65 | std::bitset<32> PersistentCallerSavedRegs() const; | ||
| 66 | |||
| 67 | struct JITState { | ||
| 68 | Engines::Maxwell3D* maxwell3d{}; | ||
| 69 | std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{}; | ||
| 70 | const u32* parameters{}; | ||
| 71 | u32 carry_flag{}; | ||
| 72 | }; | ||
| 73 | static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); | ||
| 74 | using ProgramType = void (*)(JITState*); | ||
| 75 | |||
| 76 | struct OptimizerState { | ||
| 77 | bool can_skip_carry{}; | ||
| 78 | bool has_delayed_pc{}; | ||
| 79 | bool zero_reg_skip{}; | ||
| 80 | bool skip_dummy_addimmediate{}; | ||
| 81 | bool optimize_for_method_move{}; | ||
| 82 | }; | ||
| 83 | OptimizerState optimizer{}; | ||
| 84 | |||
| 85 | std::optional<Macro::Opcode> next_opcode{}; | ||
| 86 | ProgramType program{nullptr}; | ||
| 87 | |||
| 88 | std::array<Xbyak::Label, MAX_CODE_SIZE> labels; | ||
| 89 | std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip; | ||
| 90 | Xbyak::Label end_of_code{}; | ||
| 91 | |||
| 92 | bool is_delay_slot{}; | ||
| 93 | u32 pc{}; | ||
| 94 | std::optional<u32> delayed_pc; | ||
| 95 | |||
| 96 | const std::vector<u32>& code; | ||
| 97 | Engines::Maxwell3D& maxwell3d; | ||
| 98 | }; | ||
| 99 | |||
| 100 | } // namespace Tegra | ||
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp deleted file mode 100644 index 093b2cdf4..000000000 --- a/src/video_core/rasterizer_cache.cpp +++ /dev/null | |||
| @@ -1,7 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/rasterizer_cache.h" | ||
| 6 | |||
| 7 | RasterizerCacheObject::~RasterizerCacheObject() = default; | ||
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h deleted file mode 100644 index 22987751e..000000000 --- a/src/video_core/rasterizer_cache.h +++ /dev/null | |||
| @@ -1,197 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <mutex> | ||
| 8 | #include <set> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include <boost/icl/interval_map.hpp> | ||
| 12 | #include <boost/range/iterator_range_core.hpp> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "core/settings.h" | ||
| 16 | #include "video_core/gpu.h" | ||
| 17 | #include "video_core/rasterizer_interface.h" | ||
| 18 | |||
| 19 | class RasterizerCacheObject { | ||
| 20 | public: | ||
| 21 | explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} | ||
| 22 | |||
| 23 | virtual ~RasterizerCacheObject(); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | /// Gets the size of the shader in guest memory, required for cache management | ||
| 30 | virtual std::size_t GetSizeInBytes() const = 0; | ||
| 31 | |||
| 32 | /// Sets whether the cached object should be considered registered | ||
| 33 | void SetIsRegistered(bool registered) { | ||
| 34 | is_registered = registered; | ||
| 35 | } | ||
| 36 | |||
| 37 | /// Returns true if the cached object is registered | ||
| 38 | bool IsRegistered() const { | ||
| 39 | return is_registered; | ||
| 40 | } | ||
| 41 | |||
| 42 | /// Returns true if the cached object is dirty | ||
| 43 | bool IsDirty() const { | ||
| 44 | return is_dirty; | ||
| 45 | } | ||
| 46 | |||
| 47 | /// Returns ticks from when this cached object was last modified | ||
| 48 | u64 GetLastModifiedTicks() const { | ||
| 49 | return last_modified_ticks; | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Marks an object as recently modified, used to specify whether it is clean or dirty | ||
| 53 | template <class T> | ||
| 54 | void MarkAsModified(bool dirty, T& cache) { | ||
| 55 | is_dirty = dirty; | ||
| 56 | last_modified_ticks = cache.GetModifiedTicks(); | ||
| 57 | } | ||
| 58 | |||
| 59 | private: | ||
| 60 | bool is_registered{}; ///< Whether the object is currently registered with the cache | ||
| 61 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) | ||
| 62 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing | ||
| 63 | VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space | ||
| 64 | }; | ||
| 65 | |||
| 66 | template <class T> | ||
| 67 | class RasterizerCache : NonCopyable { | ||
| 68 | friend class RasterizerCacheObject; | ||
| 69 | |||
| 70 | public: | ||
| 71 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | ||
| 72 | |||
| 73 | /// Write any cached resources overlapping the specified region back to memory | ||
| 74 | void FlushRegion(VAddr addr, std::size_t size) { | ||
| 75 | std::lock_guard lock{mutex}; | ||
| 76 | |||
| 77 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||
| 78 | for (auto& object : objects) { | ||
| 79 | FlushObject(object); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | /// Mark the specified region as being invalidated | ||
| 84 | void InvalidateRegion(VAddr addr, u64 size) { | ||
| 85 | std::lock_guard lock{mutex}; | ||
| 86 | |||
| 87 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||
| 88 | for (auto& object : objects) { | ||
| 89 | if (!object->IsRegistered()) { | ||
| 90 | // Skip duplicates | ||
| 91 | continue; | ||
| 92 | } | ||
| 93 | Unregister(object); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | /// Invalidates everything in the cache | ||
| 98 | void InvalidateAll() { | ||
| 99 | std::lock_guard lock{mutex}; | ||
| 100 | |||
| 101 | while (interval_cache.begin() != interval_cache.end()) { | ||
| 102 | Unregister(*interval_cache.begin()->second.begin()); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | protected: | ||
| 107 | /// Tries to get an object from the cache with the specified cache address | ||
| 108 | T TryGet(VAddr addr) const { | ||
| 109 | const auto iter = map_cache.find(addr); | ||
| 110 | if (iter != map_cache.end()) | ||
| 111 | return iter->second; | ||
| 112 | return nullptr; | ||
| 113 | } | ||
| 114 | |||
| 115 | /// Register an object into the cache | ||
| 116 | virtual void Register(const T& object) { | ||
| 117 | std::lock_guard lock{mutex}; | ||
| 118 | |||
| 119 | object->SetIsRegistered(true); | ||
| 120 | interval_cache.add({GetInterval(object), ObjectSet{object}}); | ||
| 121 | map_cache.insert({object->GetCpuAddr(), object}); | ||
| 122 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); | ||
| 123 | } | ||
| 124 | |||
| 125 | /// Unregisters an object from the cache | ||
| 126 | virtual void Unregister(const T& object) { | ||
| 127 | std::lock_guard lock{mutex}; | ||
| 128 | |||
| 129 | object->SetIsRegistered(false); | ||
| 130 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | ||
| 131 | const VAddr addr = object->GetCpuAddr(); | ||
| 132 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | ||
| 133 | map_cache.erase(addr); | ||
| 134 | } | ||
| 135 | |||
| 136 | /// Returns a ticks counter used for tracking when cached objects were last modified | ||
| 137 | u64 GetModifiedTicks() { | ||
| 138 | std::lock_guard lock{mutex}; | ||
| 139 | |||
| 140 | return ++modified_ticks; | ||
| 141 | } | ||
| 142 | |||
| 143 | virtual void FlushObjectInner(const T& object) = 0; | ||
| 144 | |||
| 145 | /// Flushes the specified object, updating appropriate cache state as needed | ||
| 146 | void FlushObject(const T& object) { | ||
| 147 | std::lock_guard lock{mutex}; | ||
| 148 | |||
| 149 | if (!object->IsDirty()) { | ||
| 150 | return; | ||
| 151 | } | ||
| 152 | FlushObjectInner(object); | ||
| 153 | object->MarkAsModified(false, *this); | ||
| 154 | } | ||
| 155 | |||
| 156 | std::recursive_mutex mutex; | ||
| 157 | |||
| 158 | private: | ||
| 159 | /// Returns a list of cached objects from the specified memory region, ordered by access time | ||
| 160 | std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { | ||
| 161 | if (size == 0) { | ||
| 162 | return {}; | ||
| 163 | } | ||
| 164 | |||
| 165 | std::vector<T> objects; | ||
| 166 | const ObjectInterval interval{addr, addr + size}; | ||
| 167 | for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) { | ||
| 168 | for (auto& cached_object : pair.second) { | ||
| 169 | if (!cached_object) { | ||
| 170 | continue; | ||
| 171 | } | ||
| 172 | objects.push_back(cached_object); | ||
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool { | ||
| 177 | return a->GetLastModifiedTicks() < b->GetLastModifiedTicks(); | ||
| 178 | }); | ||
| 179 | |||
| 180 | return objects; | ||
| 181 | } | ||
| 182 | |||
| 183 | using ObjectSet = std::set<T>; | ||
| 184 | using ObjectCache = std::unordered_map<VAddr, T>; | ||
| 185 | using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; | ||
| 186 | using ObjectInterval = typename IntervalCache::interval_type; | ||
| 187 | |||
| 188 | static auto GetInterval(const T& object) { | ||
| 189 | return ObjectInterval::right_open(object->GetCpuAddr(), | ||
| 190 | object->GetCpuAddr() + object->GetSizeInBytes()); | ||
| 191 | } | ||
| 192 | |||
| 193 | ObjectCache map_cache; | ||
| 194 | IntervalCache interval_cache; ///< Cache of objects | ||
| 195 | u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing | ||
| 196 | VideoCore::RasterizerInterface& rasterizer; | ||
| 197 | }; | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp new file mode 100644 index 000000000..1e96b0310 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp | |||
| @@ -0,0 +1,2074 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <string> | ||
| 9 | #include <string_view> | ||
| 10 | #include <utility> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include "common/alignment.h" | ||
| 16 | #include "common/assert.h" | ||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 19 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 20 | #include "video_core/shader/registry.h" | ||
| 21 | #include "video_core/shader/shader_ir.h" | ||
| 22 | |||
| 23 | // Predicates in the decompiled code follow the convention that -1 means true and 0 means false. | ||
| 24 | // GLASM lacks booleans, so they have to be implemented as integers. | ||
| 25 | // Using -1 for true is useful because both CMP.S and NOT.U can negate it, and CMP.S can be used to | ||
| 26 | // select between two values, because -1 will be evaluated as true and 0 as false. | ||
| 27 | |||
| 28 | namespace OpenGL { | ||
| 29 | |||
| 30 | namespace { | ||
| 31 | |||
| 32 | using Tegra::Engines::ShaderType; | ||
| 33 | using Tegra::Shader::Attribute; | ||
| 34 | using Tegra::Shader::PixelImap; | ||
| 35 | using Tegra::Shader::Register; | ||
| 36 | using namespace VideoCommon::Shader; | ||
| 37 | using Operation = const OperationNode&; | ||
| 38 | |||
| 39 | constexpr std::array INTERNAL_FLAG_NAMES = {"ZERO", "SIGN", "CARRY", "OVERFLOW"}; | ||
| 40 | |||
| 41 | char Swizzle(std::size_t component) { | ||
| 42 | ASSERT(component < 4); | ||
| 43 | return component["xyzw"]; | ||
| 44 | } | ||
| 45 | |||
| 46 | constexpr bool IsGenericAttribute(Attribute::Index index) { | ||
| 47 | return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; | ||
| 48 | } | ||
| 49 | |||
| 50 | u32 GetGenericAttributeIndex(Attribute::Index index) { | ||
| 51 | ASSERT(IsGenericAttribute(index)); | ||
| 52 | return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||
| 53 | } | ||
| 54 | |||
| 55 | std::string_view Modifiers(Operation operation) { | ||
| 56 | const auto meta = std::get_if<MetaArithmetic>(&operation.GetMeta()); | ||
| 57 | if (meta && meta->precise) { | ||
| 58 | return ".PREC"; | ||
| 59 | } | ||
| 60 | return ""; | ||
| 61 | } | ||
| 62 | |||
| 63 | std::string_view GetInputFlags(PixelImap attribute) { | ||
| 64 | switch (attribute) { | ||
| 65 | case PixelImap::Perspective: | ||
| 66 | return ""; | ||
| 67 | case PixelImap::Constant: | ||
| 68 | return "FLAT "; | ||
| 69 | case PixelImap::ScreenLinear: | ||
| 70 | return "NOPERSPECTIVE "; | ||
| 71 | case PixelImap::Unused: | ||
| 72 | break; | ||
| 73 | } | ||
| 74 | UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); | ||
| 75 | return {}; | ||
| 76 | } | ||
| 77 | |||
| 78 | std::string_view ImageType(Tegra::Shader::ImageType image_type) { | ||
| 79 | switch (image_type) { | ||
| 80 | case Tegra::Shader::ImageType::Texture1D: | ||
| 81 | return "1D"; | ||
| 82 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 83 | return "BUFFER"; | ||
| 84 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 85 | return "ARRAY1D"; | ||
| 86 | case Tegra::Shader::ImageType::Texture2D: | ||
| 87 | return "2D"; | ||
| 88 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 89 | return "ARRAY2D"; | ||
| 90 | case Tegra::Shader::ImageType::Texture3D: | ||
| 91 | return "3D"; | ||
| 92 | } | ||
| 93 | UNREACHABLE(); | ||
| 94 | return {}; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::string_view StackName(MetaStackClass stack) { | ||
| 98 | switch (stack) { | ||
| 99 | case MetaStackClass::Ssy: | ||
| 100 | return "SSY"; | ||
| 101 | case MetaStackClass::Pbk: | ||
| 102 | return "PBK"; | ||
| 103 | } | ||
| 104 | UNREACHABLE(); | ||
| 105 | return ""; | ||
| 106 | }; | ||
| 107 | |||
| 108 | std::string_view PrimitiveDescription(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||
| 109 | switch (topology) { | ||
| 110 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||
| 111 | return "POINTS"; | ||
| 112 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||
| 113 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||
| 114 | return "LINES"; | ||
| 115 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||
| 116 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||
| 117 | return "LINES_ADJACENCY"; | ||
| 118 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||
| 119 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||
| 120 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||
| 121 | return "TRIANGLES"; | ||
| 122 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||
| 123 | case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||
| 124 | return "TRIANGLES_ADJACENCY"; | ||
| 125 | default: | ||
| 126 | UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||
| 127 | return "POINTS"; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | std::string_view TopologyName(Tegra::Shader::OutputTopology topology) { | ||
| 132 | switch (topology) { | ||
| 133 | case Tegra::Shader::OutputTopology::PointList: | ||
| 134 | return "POINTS"; | ||
| 135 | case Tegra::Shader::OutputTopology::LineStrip: | ||
| 136 | return "LINE_STRIP"; | ||
| 137 | case Tegra::Shader::OutputTopology::TriangleStrip: | ||
| 138 | return "TRIANGLE_STRIP"; | ||
| 139 | default: | ||
| 140 | UNIMPLEMENTED_MSG("Unknown output topology: {}", static_cast<u32>(topology)); | ||
| 141 | return "points"; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | std::string_view StageInputName(ShaderType stage) { | ||
| 146 | switch (stage) { | ||
| 147 | case ShaderType::Vertex: | ||
| 148 | case ShaderType::Geometry: | ||
| 149 | return "vertex"; | ||
| 150 | case ShaderType::Fragment: | ||
| 151 | return "fragment"; | ||
| 152 | case ShaderType::Compute: | ||
| 153 | return "invocation"; | ||
| 154 | default: | ||
| 155 | UNREACHABLE(); | ||
| 156 | return ""; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | std::string TextureType(const MetaTexture& meta) { | ||
| 161 | if (meta.sampler.is_buffer) { | ||
| 162 | return "BUFFER"; | ||
| 163 | } | ||
| 164 | std::string type; | ||
| 165 | if (meta.sampler.is_shadow) { | ||
| 166 | type += "SHADOW"; | ||
| 167 | } | ||
| 168 | if (meta.sampler.is_array) { | ||
| 169 | type += "ARRAY"; | ||
| 170 | } | ||
| 171 | type += [&meta] { | ||
| 172 | switch (meta.sampler.type) { | ||
| 173 | case Tegra::Shader::TextureType::Texture1D: | ||
| 174 | return "1D"; | ||
| 175 | case Tegra::Shader::TextureType::Texture2D: | ||
| 176 | return "2D"; | ||
| 177 | case Tegra::Shader::TextureType::Texture3D: | ||
| 178 | return "3D"; | ||
| 179 | case Tegra::Shader::TextureType::TextureCube: | ||
| 180 | return "CUBE"; | ||
| 181 | } | ||
| 182 | UNREACHABLE(); | ||
| 183 | return "2D"; | ||
| 184 | }(); | ||
| 185 | return type; | ||
| 186 | } | ||
| 187 | |||
| 188 | std::string GlobalMemoryName(const GlobalMemoryBase& base) { | ||
| 189 | return fmt::format("gmem{}_{}", base.cbuf_index, base.cbuf_offset); | ||
| 190 | } | ||
| 191 | |||
| 192 | class ARBDecompiler final { | ||
| 193 | public: | ||
| 194 | explicit ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 195 | ShaderType stage, std::string_view identifier); | ||
| 196 | |||
| 197 | std::string Code() const { | ||
| 198 | return shader_source; | ||
| 199 | } | ||
| 200 | |||
| 201 | private: | ||
| 202 | void DeclareHeader(); | ||
| 203 | void DeclareVertex(); | ||
| 204 | void DeclareGeometry(); | ||
| 205 | void DeclareFragment(); | ||
| 206 | void DeclareCompute(); | ||
| 207 | void DeclareInputAttributes(); | ||
| 208 | void DeclareOutputAttributes(); | ||
| 209 | void DeclareLocalMemory(); | ||
| 210 | void DeclareGlobalMemory(); | ||
| 211 | void DeclareConstantBuffers(); | ||
| 212 | void DeclareRegisters(); | ||
| 213 | void DeclareTemporaries(); | ||
| 214 | void DeclarePredicates(); | ||
| 215 | void DeclareInternalFlags(); | ||
| 216 | |||
| 217 | void InitializeVariables(); | ||
| 218 | |||
| 219 | void DecompileAST(); | ||
| 220 | void DecompileBranchMode(); | ||
| 221 | |||
| 222 | void VisitAST(const ASTNode& node); | ||
| 223 | std::string VisitExpression(const Expr& node); | ||
| 224 | |||
| 225 | void VisitBlock(const NodeBlock& bb); | ||
| 226 | |||
| 227 | std::string Visit(const Node& node); | ||
| 228 | |||
| 229 | std::pair<std::string, std::size_t> BuildCoords(Operation); | ||
| 230 | std::string BuildAoffi(Operation); | ||
| 231 | void Exit(); | ||
| 232 | |||
| 233 | std::string Assign(Operation); | ||
| 234 | std::string Select(Operation); | ||
| 235 | std::string FClamp(Operation); | ||
| 236 | std::string FCastHalf0(Operation); | ||
| 237 | std::string FCastHalf1(Operation); | ||
| 238 | std::string FSqrt(Operation); | ||
| 239 | std::string FSwizzleAdd(Operation); | ||
| 240 | std::string HAdd2(Operation); | ||
| 241 | std::string HMul2(Operation); | ||
| 242 | std::string HFma2(Operation); | ||
| 243 | std::string HAbsolute(Operation); | ||
| 244 | std::string HNegate(Operation); | ||
| 245 | std::string HClamp(Operation); | ||
| 246 | std::string HCastFloat(Operation); | ||
| 247 | std::string HUnpack(Operation); | ||
| 248 | std::string HMergeF32(Operation); | ||
| 249 | std::string HMergeH0(Operation); | ||
| 250 | std::string HMergeH1(Operation); | ||
| 251 | std::string HPack2(Operation); | ||
| 252 | std::string LogicalAssign(Operation); | ||
| 253 | std::string LogicalPick2(Operation); | ||
| 254 | std::string LogicalAnd2(Operation); | ||
| 255 | std::string FloatOrdered(Operation); | ||
| 256 | std::string FloatUnordered(Operation); | ||
| 257 | std::string LogicalAddCarry(Operation); | ||
| 258 | std::string Texture(Operation); | ||
| 259 | std::string TextureGather(Operation); | ||
| 260 | std::string TextureQueryDimensions(Operation); | ||
| 261 | std::string TextureQueryLod(Operation); | ||
| 262 | std::string TexelFetch(Operation); | ||
| 263 | std::string TextureGradient(Operation); | ||
| 264 | std::string ImageLoad(Operation); | ||
| 265 | std::string ImageStore(Operation); | ||
| 266 | std::string Branch(Operation); | ||
| 267 | std::string BranchIndirect(Operation); | ||
| 268 | std::string PushFlowStack(Operation); | ||
| 269 | std::string PopFlowStack(Operation); | ||
| 270 | std::string Exit(Operation); | ||
| 271 | std::string Discard(Operation); | ||
| 272 | std::string EmitVertex(Operation); | ||
| 273 | std::string EndPrimitive(Operation); | ||
| 274 | std::string InvocationId(Operation); | ||
| 275 | std::string YNegate(Operation); | ||
| 276 | std::string ThreadId(Operation); | ||
| 277 | std::string ShuffleIndexed(Operation); | ||
| 278 | std::string Barrier(Operation); | ||
| 279 | std::string MemoryBarrierGroup(Operation); | ||
| 280 | std::string MemoryBarrierGlobal(Operation); | ||
| 281 | |||
| 282 | template <const std::string_view& op> | ||
| 283 | std::string Unary(Operation operation) { | ||
| 284 | const std::string temporary = AllocTemporary(); | ||
| 285 | AddLine("{}{} {}, {};", op, Modifiers(operation), temporary, Visit(operation[0])); | ||
| 286 | return temporary; | ||
| 287 | } | ||
| 288 | |||
| 289 | template <const std::string_view& op> | ||
| 290 | std::string Binary(Operation operation) { | ||
| 291 | const std::string temporary = AllocTemporary(); | ||
| 292 | AddLine("{}{} {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 293 | Visit(operation[1])); | ||
| 294 | return temporary; | ||
| 295 | } | ||
| 296 | |||
| 297 | template <const std::string_view& op> | ||
| 298 | std::string Trinary(Operation operation) { | ||
| 299 | const std::string temporary = AllocTemporary(); | ||
| 300 | AddLine("{}{} {}, {}, {}, {};", op, Modifiers(operation), temporary, Visit(operation[0]), | ||
| 301 | Visit(operation[1]), Visit(operation[2])); | ||
| 302 | return temporary; | ||
| 303 | } | ||
| 304 | |||
| 305 | template <const std::string_view& op, bool unordered> | ||
| 306 | std::string FloatComparison(Operation operation) { | ||
| 307 | const std::string temporary = AllocTemporary(); | ||
| 308 | AddLine("TRUNC.U.CC RC.x, {};", Binary<op>(operation)); | ||
| 309 | AddLine("MOV.S {}, 0;", temporary); | ||
| 310 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 311 | |||
| 312 | const std::string op_a = Visit(operation[0]); | ||
| 313 | const std::string op_b = Visit(operation[1]); | ||
| 314 | if constexpr (unordered) { | ||
| 315 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 316 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 317 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 318 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 319 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 320 | AddLine("MOV.S {} (NE.x), -1;", temporary); | ||
| 321 | } else if (op == SNE_F) { | ||
| 322 | AddLine("SNE.F RC.x, {}, {};", op_a, op_a); | ||
| 323 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 324 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 325 | AddLine("SNE.F RC.x, {}, {};", op_b, op_b); | ||
| 326 | AddLine("TRUNC.U.CC RC.x, RC.x;"); | ||
| 327 | AddLine("MOV.S {} (NE.x), 0;", temporary); | ||
| 328 | } | ||
| 329 | return temporary; | ||
| 330 | } | ||
| 331 | |||
| 332 | template <const std::string_view& op, bool is_nan> | ||
| 333 | std::string HalfComparison(Operation operation) { | ||
| 334 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 335 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 336 | const std::string op_a = Visit(operation[0]); | ||
| 337 | const std::string op_b = Visit(operation[1]); | ||
| 338 | AddLine("UP2H.F {}, {};", tmp1, op_a); | ||
| 339 | AddLine("UP2H.F {}, {};", tmp2, op_b); | ||
| 340 | AddLine("{} {}, {}, {};", op, tmp1, tmp1, tmp2); | ||
| 341 | AddLine("TRUNC.U.CC RC.xy, {};", tmp1); | ||
| 342 | AddLine("MOV.S {}.xy, {{0, 0, 0, 0}};", tmp1); | ||
| 343 | AddLine("MOV.S {}.x (NE.x), -1;", tmp1); | ||
| 344 | AddLine("MOV.S {}.y (NE.y), -1;", tmp1); | ||
| 345 | if constexpr (is_nan) { | ||
| 346 | AddLine("MOVC.F RC.x, {};", op_a); | ||
| 347 | AddLine("MOV.S {}.x (NAN.x), -1;", tmp1); | ||
| 348 | AddLine("MOVC.F RC.x, {};", op_b); | ||
| 349 | AddLine("MOV.S {}.y (NAN.x), -1;", tmp1); | ||
| 350 | } | ||
| 351 | return tmp1; | ||
| 352 | } | ||
| 353 | |||
| 354 | template <const std::string_view& op, const std::string_view& type> | ||
| 355 | std::string AtomicImage(Operation operation) { | ||
| 356 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 357 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 358 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 359 | const std::size_t num_values = meta.values.size(); | ||
| 360 | |||
| 361 | const std::string coord = AllocVectorTemporary(); | ||
| 362 | const std::string value = AllocVectorTemporary(); | ||
| 363 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 364 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 365 | } | ||
| 366 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 367 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 368 | } | ||
| 369 | |||
| 370 | const std::string result = coord; | ||
| 371 | AddLine("ATOMIM.{}.{} {}.x, {}, {}, image[{}], {};", op, type, result, value, coord, | ||
| 372 | image_id, ImageType(meta.image.type)); | ||
| 373 | return fmt::format("{}.x", result); | ||
| 374 | } | ||
| 375 | |||
| 376 | template <const std::string_view& op, const std::string_view& type> | ||
| 377 | std::string Atomic(Operation operation) { | ||
| 378 | const std::string temporary = AllocTemporary(); | ||
| 379 | std::string address; | ||
| 380 | std::string_view opname; | ||
| 381 | if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||
| 382 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 383 | Visit(gmem->GetBaseAddress())); | ||
| 384 | address = fmt::format("{}[{}]", GlobalMemoryName(gmem->GetDescriptor()), temporary); | ||
| 385 | opname = "ATOMB"; | ||
| 386 | } else if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||
| 387 | address = fmt::format("shared_mem[{}]", Visit(smem->GetAddress())); | ||
| 388 | opname = "ATOMS"; | ||
| 389 | } else { | ||
| 390 | UNREACHABLE(); | ||
| 391 | return "{0, 0, 0, 0}"; | ||
| 392 | } | ||
| 393 | AddLine("{}.{}.{} {}, {}, {};", opname, op, type, temporary, Visit(operation[1]), address); | ||
| 394 | return temporary; | ||
| 395 | } | ||
| 396 | |||
| 397 | template <char type> | ||
| 398 | std::string Negate(Operation operation) { | ||
| 399 | const std::string temporary = AllocTemporary(); | ||
| 400 | if constexpr (type == 'F') { | ||
| 401 | AddLine("MOV.F32 {}, -{};", temporary, Visit(operation[0])); | ||
| 402 | } else { | ||
| 403 | AddLine("MOV.{} {}, -{};", type, temporary, Visit(operation[0])); | ||
| 404 | } | ||
| 405 | return temporary; | ||
| 406 | } | ||
| 407 | |||
| 408 | template <char type> | ||
| 409 | std::string Absolute(Operation operation) { | ||
| 410 | const std::string temporary = AllocTemporary(); | ||
| 411 | AddLine("MOV.{} {}, |{}|;", type, temporary, Visit(operation[0])); | ||
| 412 | return temporary; | ||
| 413 | } | ||
| 414 | |||
| 415 | template <char type> | ||
| 416 | std::string BitfieldInsert(Operation operation) { | ||
| 417 | const std::string temporary = AllocVectorTemporary(); | ||
| 418 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[3])); | ||
| 419 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[2])); | ||
| 420 | AddLine("BFI.{} {}.x, {}, {}, {};", type, temporary, temporary, Visit(operation[1]), | ||
| 421 | Visit(operation[0])); | ||
| 422 | return fmt::format("{}.x", temporary); | ||
| 423 | } | ||
| 424 | |||
| 425 | template <char type> | ||
| 426 | std::string BitfieldExtract(Operation operation) { | ||
| 427 | const std::string temporary = AllocVectorTemporary(); | ||
| 428 | AddLine("MOV.{} {}.x, {};", type, temporary, Visit(operation[2])); | ||
| 429 | AddLine("MOV.{} {}.y, {};", type, temporary, Visit(operation[1])); | ||
| 430 | AddLine("BFE.{} {}.x, {}, {};", type, temporary, temporary, Visit(operation[0])); | ||
| 431 | return fmt::format("{}.x", temporary); | ||
| 432 | } | ||
| 433 | |||
| 434 | template <char swizzle> | ||
| 435 | std::string LocalInvocationId(Operation) { | ||
| 436 | return fmt::format("invocation.localid.{}", swizzle); | ||
| 437 | } | ||
| 438 | |||
| 439 | template <char swizzle> | ||
| 440 | std::string WorkGroupId(Operation) { | ||
| 441 | return fmt::format("invocation.groupid.{}", swizzle); | ||
| 442 | } | ||
| 443 | |||
| 444 | template <char c1, char c2> | ||
| 445 | std::string ThreadMask(Operation) { | ||
| 446 | return fmt::format("{}.thread{}{}mask", StageInputName(stage), c1, c2); | ||
| 447 | } | ||
| 448 | |||
| 449 | template <typename... Args> | ||
| 450 | void AddExpression(std::string_view text, Args&&... args) { | ||
| 451 | shader_source += fmt::format(text, std::forward<Args>(args)...); | ||
| 452 | } | ||
| 453 | |||
| 454 | template <typename... Args> | ||
| 455 | void AddLine(std::string_view text, Args&&... args) { | ||
| 456 | AddExpression(text, std::forward<Args>(args)...); | ||
| 457 | shader_source += '\n'; | ||
| 458 | } | ||
| 459 | |||
| 460 | std::string AllocTemporary() { | ||
| 461 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 462 | return fmt::format("T{}.x", num_temporaries++); | ||
| 463 | } | ||
| 464 | |||
| 465 | std::string AllocVectorTemporary() { | ||
| 466 | max_temporaries = std::max(max_temporaries, num_temporaries + 1); | ||
| 467 | return fmt::format("T{}", num_temporaries++); | ||
| 468 | } | ||
| 469 | |||
| 470 | void ResetTemporaries() noexcept { | ||
| 471 | num_temporaries = 0; | ||
| 472 | } | ||
| 473 | |||
| 474 | const Device& device; | ||
| 475 | const ShaderIR& ir; | ||
| 476 | const Registry& registry; | ||
| 477 | const ShaderType stage; | ||
| 478 | |||
| 479 | std::size_t num_temporaries = 0; | ||
| 480 | std::size_t max_temporaries = 0; | ||
| 481 | |||
| 482 | std::string shader_source; | ||
| 483 | |||
| 484 | static constexpr std::string_view ADD_F32 = "ADD.F32"; | ||
| 485 | static constexpr std::string_view ADD_S = "ADD.S"; | ||
| 486 | static constexpr std::string_view ADD_U = "ADD.U"; | ||
| 487 | static constexpr std::string_view MUL_F32 = "MUL.F32"; | ||
| 488 | static constexpr std::string_view MUL_S = "MUL.S"; | ||
| 489 | static constexpr std::string_view MUL_U = "MUL.U"; | ||
| 490 | static constexpr std::string_view DIV_F32 = "DIV.F32"; | ||
| 491 | static constexpr std::string_view DIV_S = "DIV.S"; | ||
| 492 | static constexpr std::string_view DIV_U = "DIV.U"; | ||
| 493 | static constexpr std::string_view MAD_F32 = "MAD.F32"; | ||
| 494 | static constexpr std::string_view RSQ_F32 = "RSQ.F32"; | ||
| 495 | static constexpr std::string_view COS_F32 = "COS.F32"; | ||
| 496 | static constexpr std::string_view SIN_F32 = "SIN.F32"; | ||
| 497 | static constexpr std::string_view EX2_F32 = "EX2.F32"; | ||
| 498 | static constexpr std::string_view LG2_F32 = "LG2.F32"; | ||
| 499 | static constexpr std::string_view SLT_F = "SLT.F32"; | ||
| 500 | static constexpr std::string_view SLT_S = "SLT.S"; | ||
| 501 | static constexpr std::string_view SLT_U = "SLT.U"; | ||
| 502 | static constexpr std::string_view SEQ_F = "SEQ.F32"; | ||
| 503 | static constexpr std::string_view SEQ_S = "SEQ.S"; | ||
| 504 | static constexpr std::string_view SEQ_U = "SEQ.U"; | ||
| 505 | static constexpr std::string_view SLE_F = "SLE.F32"; | ||
| 506 | static constexpr std::string_view SLE_S = "SLE.S"; | ||
| 507 | static constexpr std::string_view SLE_U = "SLE.U"; | ||
| 508 | static constexpr std::string_view SGT_F = "SGT.F32"; | ||
| 509 | static constexpr std::string_view SGT_S = "SGT.S"; | ||
| 510 | static constexpr std::string_view SGT_U = "SGT.U"; | ||
| 511 | static constexpr std::string_view SNE_F = "SNE.F32"; | ||
| 512 | static constexpr std::string_view SNE_S = "SNE.S"; | ||
| 513 | static constexpr std::string_view SNE_U = "SNE.U"; | ||
| 514 | static constexpr std::string_view SGE_F = "SGE.F32"; | ||
| 515 | static constexpr std::string_view SGE_S = "SGE.S"; | ||
| 516 | static constexpr std::string_view SGE_U = "SGE.U"; | ||
| 517 | static constexpr std::string_view AND_S = "AND.S"; | ||
| 518 | static constexpr std::string_view AND_U = "AND.U"; | ||
| 519 | static constexpr std::string_view TRUNC_F = "TRUNC.F"; | ||
| 520 | static constexpr std::string_view TRUNC_S = "TRUNC.S"; | ||
| 521 | static constexpr std::string_view TRUNC_U = "TRUNC.U"; | ||
| 522 | static constexpr std::string_view SHL_S = "SHL.S"; | ||
| 523 | static constexpr std::string_view SHL_U = "SHL.U"; | ||
| 524 | static constexpr std::string_view SHR_S = "SHR.S"; | ||
| 525 | static constexpr std::string_view SHR_U = "SHR.U"; | ||
| 526 | static constexpr std::string_view OR_S = "OR.S"; | ||
| 527 | static constexpr std::string_view OR_U = "OR.U"; | ||
| 528 | static constexpr std::string_view XOR_S = "XOR.S"; | ||
| 529 | static constexpr std::string_view XOR_U = "XOR.U"; | ||
| 530 | static constexpr std::string_view NOT_S = "NOT.S"; | ||
| 531 | static constexpr std::string_view NOT_U = "NOT.U"; | ||
| 532 | static constexpr std::string_view BTC_S = "BTC.S"; | ||
| 533 | static constexpr std::string_view BTC_U = "BTC.U"; | ||
| 534 | static constexpr std::string_view BTFM_S = "BTFM.S"; | ||
| 535 | static constexpr std::string_view BTFM_U = "BTFM.U"; | ||
| 536 | static constexpr std::string_view ROUND_F = "ROUND.F"; | ||
| 537 | static constexpr std::string_view CEIL_F = "CEIL.F"; | ||
| 538 | static constexpr std::string_view FLR_F = "FLR.F"; | ||
| 539 | static constexpr std::string_view I2F_S = "I2F.S"; | ||
| 540 | static constexpr std::string_view I2F_U = "I2F.U"; | ||
| 541 | static constexpr std::string_view MIN_F = "MIN.F"; | ||
| 542 | static constexpr std::string_view MIN_S = "MIN.S"; | ||
| 543 | static constexpr std::string_view MIN_U = "MIN.U"; | ||
| 544 | static constexpr std::string_view MAX_F = "MAX.F"; | ||
| 545 | static constexpr std::string_view MAX_S = "MAX.S"; | ||
| 546 | static constexpr std::string_view MAX_U = "MAX.U"; | ||
| 547 | static constexpr std::string_view MOV_U = "MOV.U"; | ||
| 548 | static constexpr std::string_view TGBALLOT_U = "TGBALLOT.U"; | ||
| 549 | static constexpr std::string_view TGALL_U = "TGALL.U"; | ||
| 550 | static constexpr std::string_view TGANY_U = "TGANY.U"; | ||
| 551 | static constexpr std::string_view TGEQ_U = "TGEQ.U"; | ||
| 552 | static constexpr std::string_view EXCH = "EXCH"; | ||
| 553 | static constexpr std::string_view ADD = "ADD"; | ||
| 554 | static constexpr std::string_view MIN = "MIN"; | ||
| 555 | static constexpr std::string_view MAX = "MAX"; | ||
| 556 | static constexpr std::string_view AND = "AND"; | ||
| 557 | static constexpr std::string_view OR = "OR"; | ||
| 558 | static constexpr std::string_view XOR = "XOR"; | ||
| 559 | static constexpr std::string_view U32 = "U32"; | ||
| 560 | static constexpr std::string_view S32 = "S32"; | ||
| 561 | |||
| 562 | static constexpr std::size_t NUM_ENTRIES = static_cast<std::size_t>(OperationCode::Amount); | ||
| 563 | using DecompilerType = std::string (ARBDecompiler::*)(Operation); | ||
| 564 | static constexpr std::array<DecompilerType, NUM_ENTRIES> OPERATION_DECOMPILERS = { | ||
| 565 | &ARBDecompiler::Assign, | ||
| 566 | |||
| 567 | &ARBDecompiler::Select, | ||
| 568 | |||
| 569 | &ARBDecompiler::Binary<ADD_F32>, | ||
| 570 | &ARBDecompiler::Binary<MUL_F32>, | ||
| 571 | &ARBDecompiler::Binary<DIV_F32>, | ||
| 572 | &ARBDecompiler::Trinary<MAD_F32>, | ||
| 573 | &ARBDecompiler::Negate<'F'>, | ||
| 574 | &ARBDecompiler::Absolute<'F'>, | ||
| 575 | &ARBDecompiler::FClamp, | ||
| 576 | &ARBDecompiler::FCastHalf0, | ||
| 577 | &ARBDecompiler::FCastHalf1, | ||
| 578 | &ARBDecompiler::Binary<MIN_F>, | ||
| 579 | &ARBDecompiler::Binary<MAX_F>, | ||
| 580 | &ARBDecompiler::Unary<COS_F32>, | ||
| 581 | &ARBDecompiler::Unary<SIN_F32>, | ||
| 582 | &ARBDecompiler::Unary<EX2_F32>, | ||
| 583 | &ARBDecompiler::Unary<LG2_F32>, | ||
| 584 | &ARBDecompiler::Unary<RSQ_F32>, | ||
| 585 | &ARBDecompiler::FSqrt, | ||
| 586 | &ARBDecompiler::Unary<ROUND_F>, | ||
| 587 | &ARBDecompiler::Unary<FLR_F>, | ||
| 588 | &ARBDecompiler::Unary<CEIL_F>, | ||
| 589 | &ARBDecompiler::Unary<TRUNC_F>, | ||
| 590 | &ARBDecompiler::Unary<I2F_S>, | ||
| 591 | &ARBDecompiler::Unary<I2F_U>, | ||
| 592 | &ARBDecompiler::FSwizzleAdd, | ||
| 593 | |||
| 594 | &ARBDecompiler::Binary<ADD_S>, | ||
| 595 | &ARBDecompiler::Binary<MUL_S>, | ||
| 596 | &ARBDecompiler::Binary<DIV_S>, | ||
| 597 | &ARBDecompiler::Negate<'S'>, | ||
| 598 | &ARBDecompiler::Absolute<'S'>, | ||
| 599 | &ARBDecompiler::Binary<MIN_S>, | ||
| 600 | &ARBDecompiler::Binary<MAX_S>, | ||
| 601 | |||
| 602 | &ARBDecompiler::Unary<TRUNC_S>, | ||
| 603 | &ARBDecompiler::Unary<MOV_U>, | ||
| 604 | &ARBDecompiler::Binary<SHL_S>, | ||
| 605 | &ARBDecompiler::Binary<SHR_U>, | ||
| 606 | &ARBDecompiler::Binary<SHR_S>, | ||
| 607 | &ARBDecompiler::Binary<AND_S>, | ||
| 608 | &ARBDecompiler::Binary<OR_S>, | ||
| 609 | &ARBDecompiler::Binary<XOR_S>, | ||
| 610 | &ARBDecompiler::Unary<NOT_S>, | ||
| 611 | &ARBDecompiler::BitfieldInsert<'S'>, | ||
| 612 | &ARBDecompiler::BitfieldExtract<'S'>, | ||
| 613 | &ARBDecompiler::Unary<BTC_S>, | ||
| 614 | &ARBDecompiler::Unary<BTFM_S>, | ||
| 615 | |||
| 616 | &ARBDecompiler::Binary<ADD_U>, | ||
| 617 | &ARBDecompiler::Binary<MUL_U>, | ||
| 618 | &ARBDecompiler::Binary<DIV_U>, | ||
| 619 | &ARBDecompiler::Binary<MIN_U>, | ||
| 620 | &ARBDecompiler::Binary<MAX_U>, | ||
| 621 | &ARBDecompiler::Unary<TRUNC_U>, | ||
| 622 | &ARBDecompiler::Unary<MOV_U>, | ||
| 623 | &ARBDecompiler::Binary<SHL_U>, | ||
| 624 | &ARBDecompiler::Binary<SHR_U>, | ||
| 625 | &ARBDecompiler::Binary<SHR_U>, | ||
| 626 | &ARBDecompiler::Binary<AND_U>, | ||
| 627 | &ARBDecompiler::Binary<OR_U>, | ||
| 628 | &ARBDecompiler::Binary<XOR_U>, | ||
| 629 | &ARBDecompiler::Unary<NOT_U>, | ||
| 630 | &ARBDecompiler::BitfieldInsert<'U'>, | ||
| 631 | &ARBDecompiler::BitfieldExtract<'U'>, | ||
| 632 | &ARBDecompiler::Unary<BTC_U>, | ||
| 633 | &ARBDecompiler::Unary<BTFM_U>, | ||
| 634 | |||
| 635 | &ARBDecompiler::HAdd2, | ||
| 636 | &ARBDecompiler::HMul2, | ||
| 637 | &ARBDecompiler::HFma2, | ||
| 638 | &ARBDecompiler::HAbsolute, | ||
| 639 | &ARBDecompiler::HNegate, | ||
| 640 | &ARBDecompiler::HClamp, | ||
| 641 | &ARBDecompiler::HCastFloat, | ||
| 642 | &ARBDecompiler::HUnpack, | ||
| 643 | &ARBDecompiler::HMergeF32, | ||
| 644 | &ARBDecompiler::HMergeH0, | ||
| 645 | &ARBDecompiler::HMergeH1, | ||
| 646 | &ARBDecompiler::HPack2, | ||
| 647 | |||
| 648 | &ARBDecompiler::LogicalAssign, | ||
| 649 | &ARBDecompiler::Binary<AND_U>, | ||
| 650 | &ARBDecompiler::Binary<OR_U>, | ||
| 651 | &ARBDecompiler::Binary<XOR_U>, | ||
| 652 | &ARBDecompiler::Unary<NOT_U>, | ||
| 653 | &ARBDecompiler::LogicalPick2, | ||
| 654 | &ARBDecompiler::LogicalAnd2, | ||
| 655 | |||
| 656 | &ARBDecompiler::FloatComparison<SLT_F, false>, | ||
| 657 | &ARBDecompiler::FloatComparison<SEQ_F, false>, | ||
| 658 | &ARBDecompiler::FloatComparison<SLE_F, false>, | ||
| 659 | &ARBDecompiler::FloatComparison<SGT_F, false>, | ||
| 660 | &ARBDecompiler::FloatComparison<SNE_F, false>, | ||
| 661 | &ARBDecompiler::FloatComparison<SGE_F, false>, | ||
| 662 | &ARBDecompiler::FloatOrdered, | ||
| 663 | &ARBDecompiler::FloatUnordered, | ||
| 664 | &ARBDecompiler::FloatComparison<SLT_F, true>, | ||
| 665 | &ARBDecompiler::FloatComparison<SEQ_F, true>, | ||
| 666 | &ARBDecompiler::FloatComparison<SLE_F, true>, | ||
| 667 | &ARBDecompiler::FloatComparison<SGT_F, true>, | ||
| 668 | &ARBDecompiler::FloatComparison<SNE_F, true>, | ||
| 669 | &ARBDecompiler::FloatComparison<SGE_F, true>, | ||
| 670 | |||
| 671 | &ARBDecompiler::Binary<SLT_S>, | ||
| 672 | &ARBDecompiler::Binary<SEQ_S>, | ||
| 673 | &ARBDecompiler::Binary<SLE_S>, | ||
| 674 | &ARBDecompiler::Binary<SGT_S>, | ||
| 675 | &ARBDecompiler::Binary<SNE_S>, | ||
| 676 | &ARBDecompiler::Binary<SGE_S>, | ||
| 677 | |||
| 678 | &ARBDecompiler::Binary<SLT_U>, | ||
| 679 | &ARBDecompiler::Binary<SEQ_U>, | ||
| 680 | &ARBDecompiler::Binary<SLE_U>, | ||
| 681 | &ARBDecompiler::Binary<SGT_U>, | ||
| 682 | &ARBDecompiler::Binary<SNE_U>, | ||
| 683 | &ARBDecompiler::Binary<SGE_U>, | ||
| 684 | |||
| 685 | &ARBDecompiler::LogicalAddCarry, | ||
| 686 | |||
| 687 | &ARBDecompiler::HalfComparison<SLT_F, false>, | ||
| 688 | &ARBDecompiler::HalfComparison<SEQ_F, false>, | ||
| 689 | &ARBDecompiler::HalfComparison<SLE_F, false>, | ||
| 690 | &ARBDecompiler::HalfComparison<SGT_F, false>, | ||
| 691 | &ARBDecompiler::HalfComparison<SNE_F, false>, | ||
| 692 | &ARBDecompiler::HalfComparison<SGE_F, false>, | ||
| 693 | &ARBDecompiler::HalfComparison<SLT_F, true>, | ||
| 694 | &ARBDecompiler::HalfComparison<SEQ_F, true>, | ||
| 695 | &ARBDecompiler::HalfComparison<SLE_F, true>, | ||
| 696 | &ARBDecompiler::HalfComparison<SGT_F, true>, | ||
| 697 | &ARBDecompiler::HalfComparison<SNE_F, true>, | ||
| 698 | &ARBDecompiler::HalfComparison<SGE_F, true>, | ||
| 699 | |||
| 700 | &ARBDecompiler::Texture, | ||
| 701 | &ARBDecompiler::Texture, | ||
| 702 | &ARBDecompiler::TextureGather, | ||
| 703 | &ARBDecompiler::TextureQueryDimensions, | ||
| 704 | &ARBDecompiler::TextureQueryLod, | ||
| 705 | &ARBDecompiler::TexelFetch, | ||
| 706 | &ARBDecompiler::TextureGradient, | ||
| 707 | |||
| 708 | &ARBDecompiler::ImageLoad, | ||
| 709 | &ARBDecompiler::ImageStore, | ||
| 710 | |||
| 711 | &ARBDecompiler::AtomicImage<ADD, U32>, | ||
| 712 | &ARBDecompiler::AtomicImage<AND, U32>, | ||
| 713 | &ARBDecompiler::AtomicImage<OR, U32>, | ||
| 714 | &ARBDecompiler::AtomicImage<XOR, U32>, | ||
| 715 | &ARBDecompiler::AtomicImage<EXCH, U32>, | ||
| 716 | |||
| 717 | &ARBDecompiler::Atomic<EXCH, U32>, | ||
| 718 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 719 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 720 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 721 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 722 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 723 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 724 | |||
| 725 | &ARBDecompiler::Atomic<EXCH, S32>, | ||
| 726 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 727 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 728 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 729 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 730 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 731 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 732 | |||
| 733 | &ARBDecompiler::Atomic<ADD, U32>, | ||
| 734 | &ARBDecompiler::Atomic<MIN, U32>, | ||
| 735 | &ARBDecompiler::Atomic<MAX, U32>, | ||
| 736 | &ARBDecompiler::Atomic<AND, U32>, | ||
| 737 | &ARBDecompiler::Atomic<OR, U32>, | ||
| 738 | &ARBDecompiler::Atomic<XOR, U32>, | ||
| 739 | |||
| 740 | &ARBDecompiler::Atomic<ADD, S32>, | ||
| 741 | &ARBDecompiler::Atomic<MIN, S32>, | ||
| 742 | &ARBDecompiler::Atomic<MAX, S32>, | ||
| 743 | &ARBDecompiler::Atomic<AND, S32>, | ||
| 744 | &ARBDecompiler::Atomic<OR, S32>, | ||
| 745 | &ARBDecompiler::Atomic<XOR, S32>, | ||
| 746 | |||
| 747 | &ARBDecompiler::Branch, | ||
| 748 | &ARBDecompiler::BranchIndirect, | ||
| 749 | &ARBDecompiler::PushFlowStack, | ||
| 750 | &ARBDecompiler::PopFlowStack, | ||
| 751 | &ARBDecompiler::Exit, | ||
| 752 | &ARBDecompiler::Discard, | ||
| 753 | |||
| 754 | &ARBDecompiler::EmitVertex, | ||
| 755 | &ARBDecompiler::EndPrimitive, | ||
| 756 | |||
| 757 | &ARBDecompiler::InvocationId, | ||
| 758 | &ARBDecompiler::YNegate, | ||
| 759 | &ARBDecompiler::LocalInvocationId<'x'>, | ||
| 760 | &ARBDecompiler::LocalInvocationId<'y'>, | ||
| 761 | &ARBDecompiler::LocalInvocationId<'z'>, | ||
| 762 | &ARBDecompiler::WorkGroupId<'x'>, | ||
| 763 | &ARBDecompiler::WorkGroupId<'y'>, | ||
| 764 | &ARBDecompiler::WorkGroupId<'z'>, | ||
| 765 | |||
| 766 | &ARBDecompiler::Unary<TGBALLOT_U>, | ||
| 767 | &ARBDecompiler::Unary<TGALL_U>, | ||
| 768 | &ARBDecompiler::Unary<TGANY_U>, | ||
| 769 | &ARBDecompiler::Unary<TGEQ_U>, | ||
| 770 | |||
| 771 | &ARBDecompiler::ThreadId, | ||
| 772 | &ARBDecompiler::ThreadMask<'e', 'q'>, | ||
| 773 | &ARBDecompiler::ThreadMask<'g', 'e'>, | ||
| 774 | &ARBDecompiler::ThreadMask<'g', 't'>, | ||
| 775 | &ARBDecompiler::ThreadMask<'l', 'e'>, | ||
| 776 | &ARBDecompiler::ThreadMask<'l', 't'>, | ||
| 777 | &ARBDecompiler::ShuffleIndexed, | ||
| 778 | |||
| 779 | &ARBDecompiler::Barrier, | ||
| 780 | &ARBDecompiler::MemoryBarrierGroup, | ||
| 781 | &ARBDecompiler::MemoryBarrierGlobal, | ||
| 782 | }; | ||
| 783 | }; | ||
| 784 | |||
| 785 | ARBDecompiler::ARBDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||
| 786 | ShaderType stage, std::string_view identifier) | ||
| 787 | : device{device}, ir{ir}, registry{registry}, stage{stage} { | ||
| 788 | AddLine("TEMP RC;"); | ||
| 789 | AddLine("TEMP FSWZA[4];"); | ||
| 790 | AddLine("TEMP FSWZB[4];"); | ||
| 791 | if (ir.IsDecompiled()) { | ||
| 792 | DecompileAST(); | ||
| 793 | } else { | ||
| 794 | DecompileBranchMode(); | ||
| 795 | } | ||
| 796 | AddLine("END"); | ||
| 797 | |||
| 798 | const std::string code = std::move(shader_source); | ||
| 799 | DeclareHeader(); | ||
| 800 | DeclareVertex(); | ||
| 801 | DeclareGeometry(); | ||
| 802 | DeclareFragment(); | ||
| 803 | DeclareCompute(); | ||
| 804 | DeclareInputAttributes(); | ||
| 805 | DeclareOutputAttributes(); | ||
| 806 | DeclareLocalMemory(); | ||
| 807 | DeclareGlobalMemory(); | ||
| 808 | DeclareConstantBuffers(); | ||
| 809 | DeclareRegisters(); | ||
| 810 | DeclareTemporaries(); | ||
| 811 | DeclarePredicates(); | ||
| 812 | DeclareInternalFlags(); | ||
| 813 | |||
| 814 | shader_source += code; | ||
| 815 | } | ||
| 816 | |||
| 817 | std::string_view HeaderStageName(ShaderType stage) { | ||
| 818 | switch (stage) { | ||
| 819 | case ShaderType::Vertex: | ||
| 820 | return "vp"; | ||
| 821 | case ShaderType::Geometry: | ||
| 822 | return "gp"; | ||
| 823 | case ShaderType::Fragment: | ||
| 824 | return "fp"; | ||
| 825 | case ShaderType::Compute: | ||
| 826 | return "cp"; | ||
| 827 | default: | ||
| 828 | UNREACHABLE(); | ||
| 829 | return ""; | ||
| 830 | } | ||
| 831 | } | ||
| 832 | |||
| 833 | void ARBDecompiler::DeclareHeader() { | ||
| 834 | AddLine("!!NV{}5.0", HeaderStageName(stage)); | ||
| 835 | // Enabling this allows us to cheat on some instructions like TXL with SHADOWARRAY2D | ||
| 836 | AddLine("OPTION NV_internal;"); | ||
| 837 | AddLine("OPTION NV_gpu_program_fp64;"); | ||
| 838 | AddLine("OPTION NV_shader_storage_buffer;"); | ||
| 839 | AddLine("OPTION NV_shader_thread_group;"); | ||
| 840 | if (ir.UsesWarps() && device.HasWarpIntrinsics()) { | ||
| 841 | AddLine("OPTION NV_shader_thread_shuffle;"); | ||
| 842 | } | ||
| 843 | if (stage == ShaderType::Vertex) { | ||
| 844 | if (device.HasNvViewportArray2()) { | ||
| 845 | AddLine("OPTION NV_viewport_array2;"); | ||
| 846 | } | ||
| 847 | } | ||
| 848 | if (stage == ShaderType::Fragment) { | ||
| 849 | AddLine("OPTION ARB_draw_buffers;"); | ||
| 850 | } | ||
| 851 | if (device.HasImageLoadFormatted()) { | ||
| 852 | AddLine("OPTION EXT_shader_image_load_formatted;"); | ||
| 853 | } | ||
| 854 | } | ||
| 855 | |||
| 856 | void ARBDecompiler::DeclareVertex() { | ||
| 857 | if (stage != ShaderType::Vertex) { | ||
| 858 | return; | ||
| 859 | } | ||
| 860 | AddLine("OUTPUT result_clip[] = {{ result.clip[0..7] }};"); | ||
| 861 | } | ||
| 862 | |||
| 863 | void ARBDecompiler::DeclareGeometry() { | ||
| 864 | if (stage != ShaderType::Geometry) { | ||
| 865 | return; | ||
| 866 | } | ||
| 867 | const auto& info = registry.GetGraphicsInfo(); | ||
| 868 | const auto& header = ir.GetHeader(); | ||
| 869 | AddLine("PRIMITIVE_IN {};", PrimitiveDescription(info.primitive_topology)); | ||
| 870 | AddLine("PRIMITIVE_OUT {};", TopologyName(header.common3.output_topology)); | ||
| 871 | AddLine("VERTICES_OUT {};", header.common4.max_output_vertices.Value()); | ||
| 872 | AddLine("ATTRIB vertex_position = vertex.position;"); | ||
| 873 | } | ||
| 874 | |||
| 875 | void ARBDecompiler::DeclareFragment() { | ||
| 876 | if (stage != ShaderType::Fragment) { | ||
| 877 | return; | ||
| 878 | } | ||
| 879 | AddLine("OUTPUT result_color7 = result.color[7];"); | ||
| 880 | AddLine("OUTPUT result_color6 = result.color[6];"); | ||
| 881 | AddLine("OUTPUT result_color5 = result.color[5];"); | ||
| 882 | AddLine("OUTPUT result_color4 = result.color[4];"); | ||
| 883 | AddLine("OUTPUT result_color3 = result.color[3];"); | ||
| 884 | AddLine("OUTPUT result_color2 = result.color[2];"); | ||
| 885 | AddLine("OUTPUT result_color1 = result.color[1];"); | ||
| 886 | AddLine("OUTPUT result_color0 = result.color;"); | ||
| 887 | } | ||
| 888 | |||
| 889 | void ARBDecompiler::DeclareCompute() { | ||
| 890 | if (stage != ShaderType::Compute) { | ||
| 891 | return; | ||
| 892 | } | ||
| 893 | const ComputeInfo& info = registry.GetComputeInfo(); | ||
| 894 | AddLine("GROUP_SIZE {} {} {};", info.workgroup_size[0], info.workgroup_size[1], | ||
| 895 | info.workgroup_size[2]); | ||
| 896 | if (info.shared_memory_size_in_words > 0) { | ||
| 897 | const u32 size_in_bytes = info.shared_memory_size_in_words * 4; | ||
| 898 | AddLine("SHARED_MEMORY {};", size_in_bytes); | ||
| 899 | AddLine("SHARED shared_mem[] = {{program.sharedmem}};"); | ||
| 900 | } | ||
| 901 | } | ||
| 902 | |||
| 903 | void ARBDecompiler::DeclareInputAttributes() { | ||
| 904 | if (stage == ShaderType::Compute) { | ||
| 905 | return; | ||
| 906 | } | ||
| 907 | const std::string_view stage_name = StageInputName(stage); | ||
| 908 | for (const auto attribute : ir.GetInputAttributes()) { | ||
| 909 | if (!IsGenericAttribute(attribute)) { | ||
| 910 | continue; | ||
| 911 | } | ||
| 912 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 913 | |||
| 914 | std::string_view suffix; | ||
| 915 | if (stage == ShaderType::Fragment) { | ||
| 916 | const auto input_mode{ir.GetHeader().ps.GetPixelImap(index)}; | ||
| 917 | if (input_mode == PixelImap::Unused) { | ||
| 918 | return; | ||
| 919 | } | ||
| 920 | suffix = GetInputFlags(input_mode); | ||
| 921 | } | ||
| 922 | AddLine("{}ATTRIB in_attr{}[] = {{ {}.attrib[{}..{}] }};", suffix, index, stage_name, index, | ||
| 923 | index); | ||
| 924 | } | ||
| 925 | } | ||
| 926 | |||
| 927 | void ARBDecompiler::DeclareOutputAttributes() { | ||
| 928 | if (stage == ShaderType::Compute) { | ||
| 929 | return; | ||
| 930 | } | ||
| 931 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 932 | if (!IsGenericAttribute(attribute)) { | ||
| 933 | continue; | ||
| 934 | } | ||
| 935 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 936 | AddLine("OUTPUT out_attr{}[] = {{ result.attrib[{}..{}] }};", index, index, index); | ||
| 937 | } | ||
| 938 | } | ||
| 939 | |||
| 940 | void ARBDecompiler::DeclareLocalMemory() { | ||
| 941 | u64 size = 0; | ||
| 942 | if (stage == ShaderType::Compute) { | ||
| 943 | size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||
| 944 | } else { | ||
| 945 | size = ir.GetHeader().GetLocalMemorySize(); | ||
| 946 | } | ||
| 947 | if (size == 0) { | ||
| 948 | return; | ||
| 949 | } | ||
| 950 | const u64 element_count = Common::AlignUp(size, 4) / 4; | ||
| 951 | AddLine("TEMP lmem[{}];", element_count); | ||
| 952 | } | ||
| 953 | |||
| 954 | void ARBDecompiler::DeclareGlobalMemory() { | ||
| 955 | u32 binding = 0; // device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 956 | for (const auto& pair : ir.GetGlobalMemory()) { | ||
| 957 | const auto& base = pair.first; | ||
| 958 | AddLine("STORAGE {}[] = {{ program.storage[{}] }};", GlobalMemoryName(base), binding); | ||
| 959 | ++binding; | ||
| 960 | } | ||
| 961 | } | ||
| 962 | |||
| 963 | void ARBDecompiler::DeclareConstantBuffers() { | ||
| 964 | u32 binding = 0; | ||
| 965 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 966 | AddLine("CBUFFER cbuf{}[] = {{ program.buffer[{}] }};", cbuf.first, binding); | ||
| 967 | ++binding; | ||
| 968 | } | ||
| 969 | } | ||
| 970 | |||
| 971 | void ARBDecompiler::DeclareRegisters() { | ||
| 972 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 973 | AddLine("TEMP R{};", gpr); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | void ARBDecompiler::DeclareTemporaries() { | ||
| 978 | for (std::size_t i = 0; i < max_temporaries; ++i) { | ||
| 979 | AddLine("TEMP T{};", i); | ||
| 980 | } | ||
| 981 | } | ||
| 982 | |||
| 983 | void ARBDecompiler::DeclarePredicates() { | ||
| 984 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 985 | AddLine("TEMP P{};", static_cast<u64>(pred)); | ||
| 986 | } | ||
| 987 | } | ||
| 988 | |||
| 989 | void ARBDecompiler::DeclareInternalFlags() { | ||
| 990 | for (const char* name : INTERNAL_FLAG_NAMES) { | ||
| 991 | AddLine("TEMP {};", name); | ||
| 992 | } | ||
| 993 | } | ||
| 994 | |||
| 995 | void ARBDecompiler::InitializeVariables() { | ||
| 996 | AddLine("MOV.F32 FSWZA[0], -1;"); | ||
| 997 | AddLine("MOV.F32 FSWZA[1], 1;"); | ||
| 998 | AddLine("MOV.F32 FSWZA[2], -1;"); | ||
| 999 | AddLine("MOV.F32 FSWZA[3], 0;"); | ||
| 1000 | AddLine("MOV.F32 FSWZB[0], -1;"); | ||
| 1001 | AddLine("MOV.F32 FSWZB[1], -1;"); | ||
| 1002 | AddLine("MOV.F32 FSWZB[2], 1;"); | ||
| 1003 | AddLine("MOV.F32 FSWZB[3], -1;"); | ||
| 1004 | |||
| 1005 | if (stage == ShaderType::Vertex || stage == ShaderType::Geometry) { | ||
| 1006 | AddLine("MOV.F result.position, {{0, 0, 0, 1}};"); | ||
| 1007 | } | ||
| 1008 | for (const auto attribute : ir.GetOutputAttributes()) { | ||
| 1009 | if (!IsGenericAttribute(attribute)) { | ||
| 1010 | continue; | ||
| 1011 | } | ||
| 1012 | const u32 index = GetGenericAttributeIndex(attribute); | ||
| 1013 | AddLine("MOV.F result.attrib[{}], {{0, 0, 0, 1}};", index); | ||
| 1014 | } | ||
| 1015 | for (const u32 gpr : ir.GetRegisters()) { | ||
| 1016 | AddLine("MOV.F R{}, {{0, 0, 0, 0}};", gpr); | ||
| 1017 | } | ||
| 1018 | for (const Tegra::Shader::Pred pred : ir.GetPredicates()) { | ||
| 1019 | AddLine("MOV.U P{}, {{0, 0, 0, 0}};", static_cast<u64>(pred)); | ||
| 1020 | } | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | void ARBDecompiler::DecompileAST() { | ||
| 1024 | const u32 num_flow_variables = ir.GetASTNumVariables(); | ||
| 1025 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1026 | AddLine("TEMP F{};", i); | ||
| 1027 | } | ||
| 1028 | for (u32 i = 0; i < num_flow_variables; ++i) { | ||
| 1029 | AddLine("MOV.U F{}, {{0, 0, 0, 0}};", i); | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | InitializeVariables(); | ||
| 1033 | |||
| 1034 | VisitAST(ir.GetASTProgram()); | ||
| 1035 | } | ||
| 1036 | |||
| 1037 | void ARBDecompiler::DecompileBranchMode() { | ||
| 1038 | static constexpr u32 FLOW_STACK_SIZE = 20; | ||
| 1039 | if (!ir.IsFlowStackDisabled()) { | ||
| 1040 | AddLine("TEMP SSY[{}];", FLOW_STACK_SIZE); | ||
| 1041 | AddLine("TEMP PBK[{}];", FLOW_STACK_SIZE); | ||
| 1042 | AddLine("TEMP SSY_TOP;"); | ||
| 1043 | AddLine("TEMP PBK_TOP;"); | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | AddLine("TEMP PC;"); | ||
| 1047 | |||
| 1048 | if (!ir.IsFlowStackDisabled()) { | ||
| 1049 | AddLine("MOV.U SSY_TOP.x, 0;"); | ||
| 1050 | AddLine("MOV.U PBK_TOP.x, 0;"); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | InitializeVariables(); | ||
| 1054 | |||
| 1055 | const auto basic_block_end = ir.GetBasicBlocks().end(); | ||
| 1056 | auto basic_block_it = ir.GetBasicBlocks().begin(); | ||
| 1057 | const u32 first_address = basic_block_it->first; | ||
| 1058 | AddLine("MOV.U PC.x, {};", first_address); | ||
| 1059 | |||
| 1060 | AddLine("REP;"); | ||
| 1061 | |||
| 1062 | std::size_t num_blocks = 0; | ||
| 1063 | while (basic_block_it != basic_block_end) { | ||
| 1064 | const auto& [address, bb] = *basic_block_it; | ||
| 1065 | ++num_blocks; | ||
| 1066 | |||
| 1067 | AddLine("SEQ.S.CC RC.x, PC.x, {};", address); | ||
| 1068 | AddLine("IF NE.x;"); | ||
| 1069 | |||
| 1070 | VisitBlock(bb); | ||
| 1071 | |||
| 1072 | ++basic_block_it; | ||
| 1073 | |||
| 1074 | if (basic_block_it != basic_block_end) { | ||
| 1075 | const auto op = std::get_if<OperationNode>(&*bb[bb.size() - 1]); | ||
| 1076 | if (!op || op->GetCode() != OperationCode::Branch) { | ||
| 1077 | const u32 next_address = basic_block_it->first; | ||
| 1078 | AddLine("MOV.U PC.x, {};", next_address); | ||
| 1079 | AddLine("CONT;"); | ||
| 1080 | } | ||
| 1081 | } | ||
| 1082 | |||
| 1083 | AddLine("ELSE;"); | ||
| 1084 | } | ||
| 1085 | AddLine("RET;"); | ||
| 1086 | while (num_blocks--) { | ||
| 1087 | AddLine("ENDIF;"); | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | AddLine("ENDREP;"); | ||
| 1091 | } | ||
| 1092 | |||
| 1093 | void ARBDecompiler::VisitAST(const ASTNode& node) { | ||
| 1094 | if (const auto ast = std::get_if<ASTProgram>(&*node->GetInnerData())) { | ||
| 1095 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1096 | VisitAST(current); | ||
| 1097 | } | ||
| 1098 | } else if (const auto ast = std::get_if<ASTIfThen>(&*node->GetInnerData())) { | ||
| 1099 | const std::string condition = VisitExpression(ast->condition); | ||
| 1100 | ResetTemporaries(); | ||
| 1101 | |||
| 1102 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1103 | AddLine("IF NE.x;"); | ||
| 1104 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1105 | VisitAST(current); | ||
| 1106 | } | ||
| 1107 | AddLine("ENDIF;"); | ||
| 1108 | } else if (const auto ast = std::get_if<ASTIfElse>(&*node->GetInnerData())) { | ||
| 1109 | AddLine("ELSE;"); | ||
| 1110 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1111 | VisitAST(current); | ||
| 1112 | } | ||
| 1113 | } else if (const auto ast = std::get_if<ASTBlockDecoded>(&*node->GetInnerData())) { | ||
| 1114 | VisitBlock(ast->nodes); | ||
| 1115 | } else if (const auto ast = std::get_if<ASTVarSet>(&*node->GetInnerData())) { | ||
| 1116 | AddLine("MOV.U F{}, {};", ast->index, VisitExpression(ast->condition)); | ||
| 1117 | ResetTemporaries(); | ||
| 1118 | } else if (const auto ast = std::get_if<ASTDoWhile>(&*node->GetInnerData())) { | ||
| 1119 | const std::string condition = VisitExpression(ast->condition); | ||
| 1120 | ResetTemporaries(); | ||
| 1121 | AddLine("REP;"); | ||
| 1122 | for (ASTNode current = ast->nodes.GetFirst(); current; current = current->GetNext()) { | ||
| 1123 | VisitAST(current); | ||
| 1124 | } | ||
| 1125 | AddLine("MOVC.U RC.x, {};", condition); | ||
| 1126 | AddLine("BRK (NE.x);"); | ||
| 1127 | AddLine("ENDREP;"); | ||
| 1128 | } else if (const auto ast = std::get_if<ASTReturn>(&*node->GetInnerData())) { | ||
| 1129 | const bool is_true = ExprIsTrue(ast->condition); | ||
| 1130 | if (!is_true) { | ||
| 1131 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1132 | AddLine("IF NE.x;"); | ||
| 1133 | ResetTemporaries(); | ||
| 1134 | } | ||
| 1135 | if (ast->kills) { | ||
| 1136 | AddLine("KIL TR;"); | ||
| 1137 | } else { | ||
| 1138 | Exit(); | ||
| 1139 | } | ||
| 1140 | if (!is_true) { | ||
| 1141 | AddLine("ENDIF;"); | ||
| 1142 | } | ||
| 1143 | } else if (const auto ast = std::get_if<ASTBreak>(&*node->GetInnerData())) { | ||
| 1144 | if (ExprIsTrue(ast->condition)) { | ||
| 1145 | AddLine("BRK;"); | ||
| 1146 | } else { | ||
| 1147 | AddLine("MOVC.U RC.x, {};", VisitExpression(ast->condition)); | ||
| 1148 | AddLine("BRK (NE.x);"); | ||
| 1149 | ResetTemporaries(); | ||
| 1150 | } | ||
| 1151 | } else if (std::holds_alternative<ASTLabel>(*node->GetInnerData())) { | ||
| 1152 | // Nothing to do | ||
| 1153 | } else { | ||
| 1154 | UNREACHABLE(); | ||
| 1155 | } | ||
| 1156 | } | ||
| 1157 | |||
| 1158 | std::string ARBDecompiler::VisitExpression(const Expr& node) { | ||
| 1159 | const std::string result = AllocTemporary(); | ||
| 1160 | if (const auto expr = std::get_if<ExprAnd>(&*node)) { | ||
| 1161 | AddLine("AND.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1162 | VisitExpression(expr->operand2)); | ||
| 1163 | return result; | ||
| 1164 | } | ||
| 1165 | if (const auto expr = std::get_if<ExprOr>(&*node)) { | ||
| 1166 | const std::string result = AllocTemporary(); | ||
| 1167 | AddLine("OR.U {}, {}, {};", result, VisitExpression(expr->operand1), | ||
| 1168 | VisitExpression(expr->operand2)); | ||
| 1169 | return result; | ||
| 1170 | } | ||
| 1171 | if (const auto expr = std::get_if<ExprNot>(&*node)) { | ||
| 1172 | const std::string result = AllocTemporary(); | ||
| 1173 | AddLine("CMP.S {}, {}, 0, -1;", result, VisitExpression(expr->operand1)); | ||
| 1174 | return result; | ||
| 1175 | } | ||
| 1176 | if (const auto expr = std::get_if<ExprPredicate>(&*node)) { | ||
| 1177 | return fmt::format("P{}.x", static_cast<u64>(expr->predicate)); | ||
| 1178 | } | ||
| 1179 | if (const auto expr = std::get_if<ExprCondCode>(&*node)) { | ||
| 1180 | return Visit(ir.GetConditionCode(expr->cc)); | ||
| 1181 | } | ||
| 1182 | if (const auto expr = std::get_if<ExprVar>(&*node)) { | ||
| 1183 | return fmt::format("F{}.x", expr->var_index); | ||
| 1184 | } | ||
| 1185 | if (const auto expr = std::get_if<ExprBoolean>(&*node)) { | ||
| 1186 | return expr->value ? "0xffffffff" : "0"; | ||
| 1187 | } | ||
| 1188 | if (const auto expr = std::get_if<ExprGprEqual>(&*node)) { | ||
| 1189 | const std::string result = AllocTemporary(); | ||
| 1190 | AddLine("SEQ.U {}, R{}.x, {};", result, expr->gpr, expr->value); | ||
| 1191 | return result; | ||
| 1192 | } | ||
| 1193 | UNREACHABLE(); | ||
| 1194 | return "0"; | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | void ARBDecompiler::VisitBlock(const NodeBlock& bb) { | ||
| 1198 | for (const auto& node : bb) { | ||
| 1199 | Visit(node); | ||
| 1200 | } | ||
| 1201 | } | ||
| 1202 | |||
| 1203 | std::string ARBDecompiler::Visit(const Node& node) { | ||
| 1204 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 1205 | if (const auto amend_index = operation->GetAmendIndex()) { | ||
| 1206 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1207 | } | ||
| 1208 | const std::size_t index = static_cast<std::size_t>(operation->GetCode()); | ||
| 1209 | if (index >= OPERATION_DECOMPILERS.size()) { | ||
| 1210 | UNREACHABLE_MSG("Out of bounds operation: {}", index); | ||
| 1211 | return {}; | ||
| 1212 | } | ||
| 1213 | const auto decompiler = OPERATION_DECOMPILERS[index]; | ||
| 1214 | if (decompiler == nullptr) { | ||
| 1215 | UNREACHABLE_MSG("Undefined operation: {}", index); | ||
| 1216 | return {}; | ||
| 1217 | } | ||
| 1218 | return (this->*decompiler)(*operation); | ||
| 1219 | } | ||
| 1220 | |||
| 1221 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 1222 | const u32 index = gpr->GetIndex(); | ||
| 1223 | if (index == Register::ZeroIndex) { | ||
| 1224 | return "{0, 0, 0, 0}.x"; | ||
| 1225 | } | ||
| 1226 | return fmt::format("R{}.x", index); | ||
| 1227 | } | ||
| 1228 | |||
| 1229 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 1230 | return fmt::format("CV{}.x", cv->GetIndex()); | ||
| 1231 | } | ||
| 1232 | |||
| 1233 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | ||
| 1234 | const std::string temporary = AllocTemporary(); | ||
| 1235 | AddLine("MOV.U {}, {};", temporary, immediate->GetValue()); | ||
| 1236 | return temporary; | ||
| 1237 | } | ||
| 1238 | |||
| 1239 | if (const auto predicate = std::get_if<PredicateNode>(&*node)) { | ||
| 1240 | const std::string temporary = AllocTemporary(); | ||
| 1241 | switch (const auto index = predicate->GetIndex(); index) { | ||
| 1242 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1243 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1244 | break; | ||
| 1245 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1246 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1247 | break; | ||
| 1248 | default: | ||
| 1249 | AddLine("MOV.S {}, P{}.x;", temporary, static_cast<u64>(index)); | ||
| 1250 | break; | ||
| 1251 | } | ||
| 1252 | if (predicate->IsNegated()) { | ||
| 1253 | AddLine("CMP.S {}, {}, 0, -1;", temporary, temporary); | ||
| 1254 | } | ||
| 1255 | return temporary; | ||
| 1256 | } | ||
| 1257 | |||
| 1258 | if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||
| 1259 | if (abuf->IsPhysicalBuffer()) { | ||
| 1260 | UNIMPLEMENTED_MSG("Physical buffers are not implemented"); | ||
| 1261 | return "{0, 0, 0, 0}.x"; | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | const auto buffer_index = [this, &abuf]() -> std::string { | ||
| 1265 | if (stage != ShaderType::Geometry) { | ||
| 1266 | return ""; | ||
| 1267 | } | ||
| 1268 | return fmt::format("[{}]", Visit(abuf->GetBuffer())); | ||
| 1269 | }; | ||
| 1270 | |||
| 1271 | const Attribute::Index index = abuf->GetIndex(); | ||
| 1272 | const u32 element = abuf->GetElement(); | ||
| 1273 | const char swizzle = Swizzle(element); | ||
| 1274 | switch (index) { | ||
| 1275 | case Attribute::Index::Position: { | ||
| 1276 | if (stage == ShaderType::Geometry) { | ||
| 1277 | return fmt::format("{}_position[{}].{}", StageInputName(stage), | ||
| 1278 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1279 | } else { | ||
| 1280 | return fmt::format("{}.position.{}", StageInputName(stage), swizzle); | ||
| 1281 | } | ||
| 1282 | } | ||
| 1283 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 1284 | ASSERT(stage == ShaderType::Vertex); | ||
| 1285 | switch (element) { | ||
| 1286 | case 2: | ||
| 1287 | return "vertex.instance"; | ||
| 1288 | case 3: | ||
| 1289 | return "vertex.id"; | ||
| 1290 | } | ||
| 1291 | UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element); | ||
| 1292 | break; | ||
| 1293 | case Attribute::Index::PointCoord: | ||
| 1294 | switch (element) { | ||
| 1295 | case 0: | ||
| 1296 | return "fragment.pointcoord.x"; | ||
| 1297 | case 1: | ||
| 1298 | return "fragment.pointcoord.y"; | ||
| 1299 | } | ||
| 1300 | UNIMPLEMENTED(); | ||
| 1301 | break; | ||
| 1302 | case Attribute::Index::FrontFacing: { | ||
| 1303 | ASSERT(stage == ShaderType::Fragment); | ||
| 1304 | ASSERT(element == 3); | ||
| 1305 | const std::string temporary = AllocVectorTemporary(); | ||
| 1306 | AddLine("SGT.S RC.x, fragment.facing, {{0, 0, 0, 0}};"); | ||
| 1307 | AddLine("MOV.U.CC RC.x, -RC;"); | ||
| 1308 | AddLine("MOV.S {}.x, 0;", temporary); | ||
| 1309 | AddLine("MOV.S {}.x (NE.x), -1;", temporary); | ||
| 1310 | return fmt::format("{}.x", temporary); | ||
| 1311 | } | ||
| 1312 | default: | ||
| 1313 | if (IsGenericAttribute(index)) { | ||
| 1314 | if (stage == ShaderType::Geometry) { | ||
| 1315 | return fmt::format("in_attr{}[{}][0].{}", GetGenericAttributeIndex(index), | ||
| 1316 | Visit(abuf->GetBuffer()), swizzle); | ||
| 1317 | } else { | ||
| 1318 | return fmt::format("{}.attrib[{}].{}", StageInputName(stage), | ||
| 1319 | GetGenericAttributeIndex(index), swizzle); | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | UNIMPLEMENTED_MSG("Unimplemented input attribute={}", static_cast<int>(index)); | ||
| 1323 | break; | ||
| 1324 | } | ||
| 1325 | return "{0, 0, 0, 0}.x"; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | ||
| 1329 | std::string offset_string; | ||
| 1330 | const auto& offset = cbuf->GetOffset(); | ||
| 1331 | if (const auto imm = std::get_if<ImmediateNode>(&*offset)) { | ||
| 1332 | offset_string = std::to_string(imm->GetValue()); | ||
| 1333 | } else { | ||
| 1334 | offset_string = Visit(offset); | ||
| 1335 | } | ||
| 1336 | const std::string temporary = AllocTemporary(); | ||
| 1337 | AddLine("LDC.F32 {}, cbuf{}[{}];", temporary, cbuf->GetIndex(), offset_string); | ||
| 1338 | return temporary; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||
| 1342 | const std::string temporary = AllocTemporary(); | ||
| 1343 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1344 | Visit(gmem->GetBaseAddress())); | ||
| 1345 | AddLine("LDB.U32 {}, {}[{}];", temporary, GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1346 | temporary); | ||
| 1347 | return temporary; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||
| 1351 | const std::string temporary = Visit(lmem->GetAddress()); | ||
| 1352 | AddLine("SHR.U {}, {}, 2;", temporary, temporary); | ||
| 1353 | AddLine("MOV.U {}, lmem[{}].x;", temporary, temporary); | ||
| 1354 | return temporary; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||
| 1358 | const std::string temporary = Visit(smem->GetAddress()); | ||
| 1359 | AddLine("LDS.U32 {}, shared_mem[{}];", temporary, temporary); | ||
| 1360 | return temporary; | ||
| 1361 | } | ||
| 1362 | |||
| 1363 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||
| 1364 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1365 | return fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 1369 | if (const auto amend_index = conditional->GetAmendIndex()) { | ||
| 1370 | Visit(ir.GetAmendNode(*amend_index)); | ||
| 1371 | } | ||
| 1372 | AddLine("MOVC.U RC.x, {};", Visit(conditional->GetCondition())); | ||
| 1373 | AddLine("IF NE.x;"); | ||
| 1374 | VisitBlock(conditional->GetCode()); | ||
| 1375 | AddLine("ENDIF;"); | ||
| 1376 | return {}; | ||
| 1377 | } | ||
| 1378 | |||
| 1379 | if (const auto cmt = std::get_if<CommentNode>(&*node)) { | ||
| 1380 | // Uncommenting this will generate invalid code. GLASM lacks comments. | ||
| 1381 | // AddLine("// {}", cmt->GetText()); | ||
| 1382 | return {}; | ||
| 1383 | } | ||
| 1384 | |||
| 1385 | UNIMPLEMENTED(); | ||
| 1386 | return {}; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | std::pair<std::string, std::size_t> ARBDecompiler::BuildCoords(Operation operation) { | ||
| 1390 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1391 | UNIMPLEMENTED_IF(meta.sampler.is_indexed); | ||
| 1392 | UNIMPLEMENTED_IF(meta.sampler.is_shadow && meta.sampler.is_array && | ||
| 1393 | meta.sampler.type == Tegra::Shader::TextureType::TextureCube); | ||
| 1394 | |||
| 1395 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1396 | std::string temporary = AllocVectorTemporary(); | ||
| 1397 | std::size_t i = 0; | ||
| 1398 | for (; i < count; ++i) { | ||
| 1399 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1400 | } | ||
| 1401 | if (meta.sampler.is_array) { | ||
| 1402 | AddLine("I2F.S {}.{}, {};", temporary, Swizzle(i++), Visit(meta.array)); | ||
| 1403 | } | ||
| 1404 | if (meta.sampler.is_shadow) { | ||
| 1405 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i++), Visit(meta.depth_compare)); | ||
| 1406 | } | ||
| 1407 | return {std::move(temporary), i}; | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | std::string ARBDecompiler::BuildAoffi(Operation operation) { | ||
| 1411 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1412 | if (meta.aoffi.empty()) { | ||
| 1413 | return {}; | ||
| 1414 | } | ||
| 1415 | const std::string temporary = AllocVectorTemporary(); | ||
| 1416 | std::size_t i = 0; | ||
| 1417 | for (auto& node : meta.aoffi) { | ||
| 1418 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i++), Visit(node)); | ||
| 1419 | } | ||
| 1420 | return fmt::format(", offset({})", temporary); | ||
| 1421 | } | ||
| 1422 | |||
| 1423 | void ARBDecompiler::Exit() { | ||
| 1424 | if (stage != ShaderType::Fragment) { | ||
| 1425 | AddLine("RET;"); | ||
| 1426 | return; | ||
| 1427 | } | ||
| 1428 | |||
| 1429 | const auto safe_get_register = [this](u32 reg) -> std::string { | ||
| 1430 | // TODO(Rodrigo): Replace with contains once C++20 releases | ||
| 1431 | const auto& used_registers = ir.GetRegisters(); | ||
| 1432 | if (used_registers.find(reg) != used_registers.end()) { | ||
| 1433 | return fmt::format("R{}.x", reg); | ||
| 1434 | } | ||
| 1435 | return "{0, 0, 0, 0}.x"; | ||
| 1436 | }; | ||
| 1437 | |||
| 1438 | const auto& header = ir.GetHeader(); | ||
| 1439 | u32 current_reg = 0; | ||
| 1440 | for (u32 rt = 0; rt < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++rt) { | ||
| 1441 | for (u32 component = 0; component < 4; ++component) { | ||
| 1442 | if (!header.ps.IsColorComponentOutputEnabled(rt, component)) { | ||
| 1443 | continue; | ||
| 1444 | } | ||
| 1445 | AddLine("MOV.F result_color{}.{}, {};", rt, Swizzle(component), | ||
| 1446 | safe_get_register(current_reg)); | ||
| 1447 | ++current_reg; | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | if (header.ps.omap.depth) { | ||
| 1451 | AddLine("MOV.F result.depth.z, {};", safe_get_register(current_reg + 1)); | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | AddLine("RET;"); | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | std::string ARBDecompiler::Assign(Operation operation) { | ||
| 1458 | const Node& dest = operation[0]; | ||
| 1459 | const Node& src = operation[1]; | ||
| 1460 | |||
| 1461 | std::string dest_name; | ||
| 1462 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | ||
| 1463 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 1464 | // Writing to Register::ZeroIndex is a no op | ||
| 1465 | return {}; | ||
| 1466 | } | ||
| 1467 | dest_name = fmt::format("R{}.x", gpr->GetIndex()); | ||
| 1468 | } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { | ||
| 1469 | const u32 element = abuf->GetElement(); | ||
| 1470 | const char swizzle = Swizzle(element); | ||
| 1471 | switch (const Attribute::Index index = abuf->GetIndex()) { | ||
| 1472 | case Attribute::Index::Position: | ||
| 1473 | dest_name = fmt::format("result.position.{}", swizzle); | ||
| 1474 | break; | ||
| 1475 | case Attribute::Index::LayerViewportPointSize: | ||
| 1476 | switch (element) { | ||
| 1477 | case 0: | ||
| 1478 | UNIMPLEMENTED(); | ||
| 1479 | return {}; | ||
| 1480 | case 1: | ||
| 1481 | case 2: | ||
| 1482 | if (!device.HasNvViewportArray2()) { | ||
| 1483 | LOG_ERROR( | ||
| 1484 | Render_OpenGL, | ||
| 1485 | "NV_viewport_array2 is missing. Maxwell gen 2 or better is required."); | ||
| 1486 | return {}; | ||
| 1487 | } | ||
| 1488 | dest_name = element == 1 ? "result.layer.x" : "result.viewport.x"; | ||
| 1489 | break; | ||
| 1490 | case 3: | ||
| 1491 | dest_name = "result.pointsize.x"; | ||
| 1492 | break; | ||
| 1493 | } | ||
| 1494 | break; | ||
| 1495 | case Attribute::Index::ClipDistances0123: | ||
| 1496 | dest_name = fmt::format("result.clip[{}].x", element); | ||
| 1497 | break; | ||
| 1498 | case Attribute::Index::ClipDistances4567: | ||
| 1499 | dest_name = fmt::format("result.clip[{}].x", element + 4); | ||
| 1500 | break; | ||
| 1501 | default: | ||
| 1502 | if (!IsGenericAttribute(index)) { | ||
| 1503 | UNREACHABLE(); | ||
| 1504 | return {}; | ||
| 1505 | } | ||
| 1506 | dest_name = | ||
| 1507 | fmt::format("result.attrib[{}].{}", GetGenericAttributeIndex(index), swizzle); | ||
| 1508 | break; | ||
| 1509 | } | ||
| 1510 | } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||
| 1511 | const std::string address = Visit(lmem->GetAddress()); | ||
| 1512 | AddLine("SHR.U {}, {}, 2;", address, address); | ||
| 1513 | dest_name = fmt::format("lmem[{}].x", address); | ||
| 1514 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||
| 1515 | AddLine("STS.U32 {}, shared_mem[{}];", Visit(src), Visit(smem->GetAddress())); | ||
| 1516 | ResetTemporaries(); | ||
| 1517 | return {}; | ||
| 1518 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||
| 1519 | const std::string temporary = AllocTemporary(); | ||
| 1520 | AddLine("SUB.U {}, {}, {};", temporary, Visit(gmem->GetRealAddress()), | ||
| 1521 | Visit(gmem->GetBaseAddress())); | ||
| 1522 | AddLine("STB.U32 {}, {}[{}];", Visit(src), GlobalMemoryName(gmem->GetDescriptor()), | ||
| 1523 | temporary); | ||
| 1524 | ResetTemporaries(); | ||
| 1525 | return {}; | ||
| 1526 | } else { | ||
| 1527 | UNREACHABLE(); | ||
| 1528 | ResetTemporaries(); | ||
| 1529 | return {}; | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | AddLine("MOV.U {}, {};", dest_name, Visit(src)); | ||
| 1533 | ResetTemporaries(); | ||
| 1534 | return {}; | ||
| 1535 | } | ||
| 1536 | |||
| 1537 | std::string ARBDecompiler::Select(Operation operation) { | ||
| 1538 | const std::string temporary = AllocTemporary(); | ||
| 1539 | AddLine("CMP.S {}, {}, {}, {};", temporary, Visit(operation[0]), Visit(operation[1]), | ||
| 1540 | Visit(operation[2])); | ||
| 1541 | return temporary; | ||
| 1542 | } | ||
| 1543 | |||
| 1544 | std::string ARBDecompiler::FClamp(Operation operation) { | ||
| 1545 | // 1.0f in hex, replace with std::bit_cast on C++20 | ||
| 1546 | static constexpr u32 POSITIVE_ONE = 0x3f800000; | ||
| 1547 | |||
| 1548 | const std::string temporary = AllocTemporary(); | ||
| 1549 | const Node& value = operation[0]; | ||
| 1550 | const Node& low = operation[1]; | ||
| 1551 | const Node& high = operation[2]; | ||
| 1552 | const auto imm_low = std::get_if<ImmediateNode>(&*low); | ||
| 1553 | const auto imm_high = std::get_if<ImmediateNode>(&*high); | ||
| 1554 | if (imm_low && imm_high && imm_low->GetValue() == 0 && imm_high->GetValue() == POSITIVE_ONE) { | ||
| 1555 | AddLine("MOV.F32.SAT {}, {};", temporary, Visit(value)); | ||
| 1556 | } else { | ||
| 1557 | AddLine("MIN.F {}, {}, {};", temporary, Visit(value), Visit(high)); | ||
| 1558 | AddLine("MAX.F {}, {}, {};", temporary, temporary, Visit(low)); | ||
| 1559 | } | ||
| 1560 | return temporary; | ||
| 1561 | } | ||
| 1562 | |||
| 1563 | std::string ARBDecompiler::FCastHalf0(Operation operation) { | ||
| 1564 | const std::string temporary = AllocVectorTemporary(); | ||
| 1565 | AddLine("UP2H.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1566 | return fmt::format("{}.x", temporary); | ||
| 1567 | } | ||
| 1568 | |||
| 1569 | std::string ARBDecompiler::FCastHalf1(Operation operation) { | ||
| 1570 | const std::string temporary = AllocVectorTemporary(); | ||
| 1571 | AddLine("UP2H.F {}.y, {};", temporary, Visit(operation[0])); | ||
| 1572 | AddLine("MOV {}.x, {}.y;", temporary, temporary); | ||
| 1573 | return fmt::format("{}.x", temporary); | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | std::string ARBDecompiler::FSqrt(Operation operation) { | ||
| 1577 | const std::string temporary = AllocTemporary(); | ||
| 1578 | AddLine("RSQ.F32 {}, {};", temporary, Visit(operation[0])); | ||
| 1579 | AddLine("RCP.F32 {}, {};", temporary, temporary); | ||
| 1580 | return temporary; | ||
| 1581 | } | ||
| 1582 | |||
| 1583 | std::string ARBDecompiler::FSwizzleAdd(Operation operation) { | ||
| 1584 | const std::string temporary = AllocVectorTemporary(); | ||
| 1585 | if (!device.HasWarpIntrinsics()) { | ||
| 1586 | LOG_ERROR(Render_OpenGL, | ||
| 1587 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 1588 | AddLine("ADD.F {}.x, {}, {};", temporary, Visit(operation[0]), Visit(operation[1])); | ||
| 1589 | return fmt::format("{}.x", temporary); | ||
| 1590 | } | ||
| 1591 | const std::string lut = AllocVectorTemporary(); | ||
| 1592 | AddLine("AND.U {}.z, {}.threadid, 3;", temporary, StageInputName(stage)); | ||
| 1593 | AddLine("SHL.U {}.z, {}.z, 1;", temporary, temporary); | ||
| 1594 | AddLine("SHR.U {}.z, {}, {}.z;", temporary, Visit(operation[2]), temporary); | ||
| 1595 | AddLine("AND.U {}.z, {}.z, 3;", temporary, temporary); | ||
| 1596 | AddLine("MUL.F32 {}.x, {}, FSWZA[{}.z];", temporary, Visit(operation[0]), temporary); | ||
| 1597 | AddLine("MUL.F32 {}.y, {}, FSWZB[{}.z];", temporary, Visit(operation[1]), temporary); | ||
| 1598 | AddLine("ADD.F32 {}.x, {}.x, {}.y;", temporary, temporary, temporary); | ||
| 1599 | return fmt::format("{}.x", temporary); | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | std::string ARBDecompiler::HAdd2(Operation operation) { | ||
| 1603 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1604 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1605 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1606 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1607 | AddLine("ADD.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1608 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1609 | return fmt::format("{}.x", tmp1); | ||
| 1610 | } | ||
| 1611 | |||
| 1612 | std::string ARBDecompiler::HMul2(Operation operation) { | ||
| 1613 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1614 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1615 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1616 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1617 | AddLine("MUL.F16 {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1618 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1619 | return fmt::format("{}.x", tmp1); | ||
| 1620 | } | ||
| 1621 | |||
| 1622 | std::string ARBDecompiler::HFma2(Operation operation) { | ||
| 1623 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1624 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1625 | const std::string tmp3 = AllocVectorTemporary(); | ||
| 1626 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1627 | AddLine("UP2H.F {}.xy, {};", tmp2, Visit(operation[1])); | ||
| 1628 | AddLine("UP2H.F {}.xy, {};", tmp3, Visit(operation[2])); | ||
| 1629 | AddLine("MAD.F16 {}, {}, {}, {};", tmp1, tmp1, tmp2, tmp3); | ||
| 1630 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1631 | return fmt::format("{}.x", tmp1); | ||
| 1632 | } | ||
| 1633 | |||
| 1634 | std::string ARBDecompiler::HAbsolute(Operation operation) { | ||
| 1635 | const std::string temporary = AllocVectorTemporary(); | ||
| 1636 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1637 | AddLine("PK2H.F {}.x, |{}|;", temporary, temporary); | ||
| 1638 | return fmt::format("{}.x", temporary); | ||
| 1639 | } | ||
| 1640 | |||
| 1641 | std::string ARBDecompiler::HNegate(Operation operation) { | ||
| 1642 | const std::string temporary = AllocVectorTemporary(); | ||
| 1643 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1644 | AddLine("MOVC.S RC.x, {};", Visit(operation[1])); | ||
| 1645 | AddLine("MOV.F {}.x (NE.x), -{}.x;", temporary, temporary); | ||
| 1646 | AddLine("MOVC.S RC.x, {};", Visit(operation[2])); | ||
| 1647 | AddLine("MOV.F {}.y (NE.x), -{}.y;", temporary, temporary); | ||
| 1648 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1649 | return fmt::format("{}.x", temporary); | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | std::string ARBDecompiler::HClamp(Operation operation) { | ||
| 1653 | const std::string tmp1 = AllocVectorTemporary(); | ||
| 1654 | const std::string tmp2 = AllocVectorTemporary(); | ||
| 1655 | AddLine("UP2H.F {}.xy, {};", tmp1, Visit(operation[0])); | ||
| 1656 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[1])); | ||
| 1657 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1658 | AddLine("MAX.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1659 | AddLine("MOV.U {}.x, {};", tmp2, Visit(operation[2])); | ||
| 1660 | AddLine("MOV.U {}.y, {}.x;", tmp2, tmp2); | ||
| 1661 | AddLine("MIN.F {}, {}, {};", tmp1, tmp1, tmp2); | ||
| 1662 | AddLine("PK2H.F {}.x, {};", tmp1, tmp1); | ||
| 1663 | return fmt::format("{}.x", tmp1); | ||
| 1664 | } | ||
| 1665 | |||
| 1666 | std::string ARBDecompiler::HCastFloat(Operation operation) { | ||
| 1667 | const std::string temporary = AllocVectorTemporary(); | ||
| 1668 | AddLine("MOV.F {}.y, {{0, 0, 0, 0}};", temporary); | ||
| 1669 | AddLine("MOV.F {}.x, {};", temporary, Visit(operation[0])); | ||
| 1670 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1671 | return fmt::format("{}.x", temporary); | ||
| 1672 | } | ||
| 1673 | |||
| 1674 | std::string ARBDecompiler::HUnpack(Operation operation) { | ||
| 1675 | const std::string operand = Visit(operation[0]); | ||
| 1676 | switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) { | ||
| 1677 | case Tegra::Shader::HalfType::H0_H1: | ||
| 1678 | return operand; | ||
| 1679 | case Tegra::Shader::HalfType::F32: { | ||
| 1680 | const std::string temporary = AllocVectorTemporary(); | ||
| 1681 | AddLine("MOV.U {}.x, {};", temporary, operand); | ||
| 1682 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1683 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1684 | return fmt::format("{}.x", temporary); | ||
| 1685 | } | ||
| 1686 | case Tegra::Shader::HalfType::H0_H0: { | ||
| 1687 | const std::string temporary = AllocVectorTemporary(); | ||
| 1688 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1689 | AddLine("MOV.U {}.y, {}.x;", temporary, temporary); | ||
| 1690 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1691 | return fmt::format("{}.x", temporary); | ||
| 1692 | } | ||
| 1693 | case Tegra::Shader::HalfType::H1_H1: { | ||
| 1694 | const std::string temporary = AllocVectorTemporary(); | ||
| 1695 | AddLine("UP2H.F {}.xy, {};", temporary, operand); | ||
| 1696 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 1697 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1698 | return fmt::format("{}.x", temporary); | ||
| 1699 | } | ||
| 1700 | } | ||
| 1701 | UNREACHABLE(); | ||
| 1702 | return "{0, 0, 0, 0}.x"; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | std::string ARBDecompiler::HMergeF32(Operation operation) { | ||
| 1706 | const std::string temporary = AllocVectorTemporary(); | ||
| 1707 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1708 | return fmt::format("{}.x", temporary); | ||
| 1709 | } | ||
| 1710 | |||
| 1711 | std::string ARBDecompiler::HMergeH0(Operation operation) { | ||
| 1712 | const std::string temporary = AllocVectorTemporary(); | ||
| 1713 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1714 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1715 | AddLine("MOV.U {}.x, {}.z;", temporary, temporary); | ||
| 1716 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1717 | return fmt::format("{}.x", temporary); | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | std::string ARBDecompiler::HMergeH1(Operation operation) { | ||
| 1721 | const std::string temporary = AllocVectorTemporary(); | ||
| 1722 | AddLine("UP2H.F {}.xy, {};", temporary, Visit(operation[0])); | ||
| 1723 | AddLine("UP2H.F {}.zw, {};", temporary, Visit(operation[1])); | ||
| 1724 | AddLine("MOV.U {}.y, {}.w;", temporary, temporary); | ||
| 1725 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1726 | return fmt::format("{}.x", temporary); | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | std::string ARBDecompiler::HPack2(Operation operation) { | ||
| 1730 | const std::string temporary = AllocVectorTemporary(); | ||
| 1731 | AddLine("MOV.U {}.x, {};", temporary, Visit(operation[0])); | ||
| 1732 | AddLine("MOV.U {}.y, {};", temporary, Visit(operation[1])); | ||
| 1733 | AddLine("PK2H.F {}.x, {};", temporary, temporary); | ||
| 1734 | return fmt::format("{}.x", temporary); | ||
| 1735 | } | ||
| 1736 | |||
| 1737 | std::string ARBDecompiler::LogicalAssign(Operation operation) { | ||
| 1738 | const Node& dest = operation[0]; | ||
| 1739 | const Node& src = operation[1]; | ||
| 1740 | |||
| 1741 | std::string target; | ||
| 1742 | |||
| 1743 | if (const auto pred = std::get_if<PredicateNode>(&*dest)) { | ||
| 1744 | ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); | ||
| 1745 | |||
| 1746 | const Tegra::Shader::Pred index = pred->GetIndex(); | ||
| 1747 | switch (index) { | ||
| 1748 | case Tegra::Shader::Pred::NeverExecute: | ||
| 1749 | case Tegra::Shader::Pred::UnusedIndex: | ||
| 1750 | // Writing to these predicates is a no-op | ||
| 1751 | return {}; | ||
| 1752 | } | ||
| 1753 | target = fmt::format("P{}.x", static_cast<u64>(index)); | ||
| 1754 | } else if (const auto internal_flag = std::get_if<InternalFlagNode>(&*dest)) { | ||
| 1755 | const std::size_t index = static_cast<std::size_t>(internal_flag->GetFlag()); | ||
| 1756 | target = fmt::format("{}.x", INTERNAL_FLAG_NAMES[index]); | ||
| 1757 | } else { | ||
| 1758 | UNREACHABLE(); | ||
| 1759 | ResetTemporaries(); | ||
| 1760 | return {}; | ||
| 1761 | } | ||
| 1762 | |||
| 1763 | AddLine("MOV.U {}, {};", target, Visit(src)); | ||
| 1764 | ResetTemporaries(); | ||
| 1765 | return {}; | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | std::string ARBDecompiler::LogicalPick2(Operation operation) { | ||
| 1769 | const std::string temporary = AllocTemporary(); | ||
| 1770 | const u32 index = std::get<ImmediateNode>(*operation[1]).GetValue(); | ||
| 1771 | AddLine("MOV.U {}, {}.{};", temporary, Visit(operation[0]), Swizzle(index)); | ||
| 1772 | return temporary; | ||
| 1773 | } | ||
| 1774 | |||
| 1775 | std::string ARBDecompiler::LogicalAnd2(Operation operation) { | ||
| 1776 | const std::string temporary = AllocTemporary(); | ||
| 1777 | const std::string op = Visit(operation[0]); | ||
| 1778 | AddLine("AND.U {}, {}.x, {}.y;", temporary, op, op); | ||
| 1779 | return temporary; | ||
| 1780 | } | ||
| 1781 | |||
| 1782 | std::string ARBDecompiler::FloatOrdered(Operation operation) { | ||
| 1783 | const std::string temporary = AllocTemporary(); | ||
| 1784 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1785 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1786 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1787 | AddLine("MOV.S {} (NAN.x), 0;", temporary); | ||
| 1788 | AddLine("MOV.S {} (NAN.y), 0;", temporary); | ||
| 1789 | return temporary; | ||
| 1790 | } | ||
| 1791 | |||
| 1792 | std::string ARBDecompiler::FloatUnordered(Operation operation) { | ||
| 1793 | const std::string temporary = AllocTemporary(); | ||
| 1794 | AddLine("MOVC.F32 RC.x, {};", Visit(operation[0])); | ||
| 1795 | AddLine("MOVC.F32 RC.y, {};", Visit(operation[1])); | ||
| 1796 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1797 | AddLine("MOV.S {} (NAN.x), -1;", temporary); | ||
| 1798 | AddLine("MOV.S {} (NAN.y), -1;", temporary); | ||
| 1799 | return temporary; | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | std::string ARBDecompiler::LogicalAddCarry(Operation operation) { | ||
| 1803 | const std::string temporary = AllocTemporary(); | ||
| 1804 | AddLine("ADDC.U RC, {}, {};", Visit(operation[0]), Visit(operation[1])); | ||
| 1805 | AddLine("MOV.S {}, 0;", temporary); | ||
| 1806 | AddLine("IF CF.x;"); | ||
| 1807 | AddLine("MOV.S {}, -1;", temporary); | ||
| 1808 | AddLine("ENDIF;"); | ||
| 1809 | return temporary; | ||
| 1810 | } | ||
| 1811 | |||
| 1812 | std::string ARBDecompiler::Texture(Operation operation) { | ||
| 1813 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1814 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1815 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1816 | |||
| 1817 | std::string_view opcode = "TEX"; | ||
| 1818 | std::string extra; | ||
| 1819 | if (meta.bias) { | ||
| 1820 | ASSERT(!meta.lod); | ||
| 1821 | opcode = "TXB"; | ||
| 1822 | |||
| 1823 | if (swizzle < 4) { | ||
| 1824 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.bias)); | ||
| 1825 | } else { | ||
| 1826 | const std::string bias = AllocTemporary(); | ||
| 1827 | AddLine("MOV.F {}, {};", bias, Visit(meta.bias)); | ||
| 1828 | extra = fmt::format(" {},", bias); | ||
| 1829 | } | ||
| 1830 | } | ||
| 1831 | if (meta.lod) { | ||
| 1832 | ASSERT(!meta.bias); | ||
| 1833 | opcode = "TXL"; | ||
| 1834 | |||
| 1835 | if (swizzle < 4) { | ||
| 1836 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1837 | } else { | ||
| 1838 | const std::string lod = AllocTemporary(); | ||
| 1839 | AddLine("MOV.F {}, {};", lod, Visit(meta.lod)); | ||
| 1840 | extra = fmt::format(" {},", lod); | ||
| 1841 | } | ||
| 1842 | } | ||
| 1843 | |||
| 1844 | AddLine("{}.F {}, {},{} texture[{}], {}{};", opcode, temporary, temporary, extra, sampler_id, | ||
| 1845 | TextureType(meta), BuildAoffi(operation)); | ||
| 1846 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1847 | return fmt::format("{}.x", temporary); | ||
| 1848 | } | ||
| 1849 | |||
| 1850 | std::string ARBDecompiler::TextureGather(Operation operation) { | ||
| 1851 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1852 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1853 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1854 | |||
| 1855 | std::string comp; | ||
| 1856 | if (!meta.sampler.is_shadow) { | ||
| 1857 | const auto& immediate = std::get<ImmediateNode>(*meta.component); | ||
| 1858 | comp = fmt::format(".{}", Swizzle(immediate.GetValue())); | ||
| 1859 | } | ||
| 1860 | |||
| 1861 | AddLine("TXG.F {}, {}, texture[{}]{}, {}{};", temporary, temporary, sampler_id, comp, | ||
| 1862 | TextureType(meta), BuildAoffi(operation)); | ||
| 1863 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1864 | return fmt::format("{}.x", temporary); | ||
| 1865 | } | ||
| 1866 | |||
| 1867 | std::string ARBDecompiler::TextureQueryDimensions(Operation operation) { | ||
| 1868 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1869 | const std::string temporary = AllocVectorTemporary(); | ||
| 1870 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1871 | |||
| 1872 | ASSERT(!meta.sampler.is_array); | ||
| 1873 | |||
| 1874 | const std::string lod = operation.GetOperandsCount() > 0 ? Visit(operation[0]) : "0"; | ||
| 1875 | AddLine("TXQ {}, {}, texture[{}], {};", temporary, lod, sampler_id, TextureType(meta)); | ||
| 1876 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1877 | return fmt::format("{}.x", temporary); | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | std::string ARBDecompiler::TextureQueryLod(Operation operation) { | ||
| 1881 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1882 | const std::string temporary = AllocVectorTemporary(); | ||
| 1883 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1884 | |||
| 1885 | ASSERT(!meta.sampler.is_array); | ||
| 1886 | |||
| 1887 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1888 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1889 | AddLine("MOV.F {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1890 | } | ||
| 1891 | AddLine("LOD.F {}, {}, texture[{}], {};", temporary, temporary, sampler_id, TextureType(meta)); | ||
| 1892 | AddLine("MUL.F32 {}, {}, {{256, 256, 0, 0}};", temporary, temporary); | ||
| 1893 | AddLine("TRUNC.S {}, {};", temporary, temporary); | ||
| 1894 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1895 | return fmt::format("{}.x", temporary); | ||
| 1896 | } | ||
| 1897 | |||
| 1898 | std::string ARBDecompiler::TexelFetch(Operation operation) { | ||
| 1899 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1900 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1901 | const auto [temporary, swizzle] = BuildCoords(operation); | ||
| 1902 | |||
| 1903 | if (!meta.sampler.is_buffer) { | ||
| 1904 | ASSERT(swizzle < 4); | ||
| 1905 | AddLine("MOV.F {}.w, {};", temporary, Visit(meta.lod)); | ||
| 1906 | } | ||
| 1907 | AddLine("TXF.F {}, {}, texture[{}], {}{};", temporary, temporary, sampler_id, TextureType(meta), | ||
| 1908 | BuildAoffi(operation)); | ||
| 1909 | AddLine("MOV.U {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1910 | return fmt::format("{}.x", temporary); | ||
| 1911 | } | ||
| 1912 | |||
| 1913 | std::string ARBDecompiler::TextureGradient(Operation operation) { | ||
| 1914 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1915 | const u32 sampler_id = device.GetBaseBindings(stage).sampler + meta.sampler.index; | ||
| 1916 | const std::string ddx = AllocVectorTemporary(); | ||
| 1917 | const std::string ddy = AllocVectorTemporary(); | ||
| 1918 | const std::string coord = BuildCoords(operation).first; | ||
| 1919 | |||
| 1920 | const std::size_t num_components = meta.derivates.size() / 2; | ||
| 1921 | for (std::size_t index = 0; index < num_components; ++index) { | ||
| 1922 | const char swizzle = Swizzle(index); | ||
| 1923 | AddLine("MOV.F {}.{}, {};", ddx, swizzle, Visit(meta.derivates[index * 2])); | ||
| 1924 | AddLine("MOV.F {}.{}, {};", ddy, swizzle, Visit(meta.derivates[index * 2 + 1])); | ||
| 1925 | } | ||
| 1926 | |||
| 1927 | const std::string_view result = coord; | ||
| 1928 | AddLine("TXD.F {}, {}, {}, {}, texture[{}], {}{};", result, coord, ddx, ddy, sampler_id, | ||
| 1929 | TextureType(meta), BuildAoffi(operation)); | ||
| 1930 | AddLine("MOV.F {}.x, {}.{};", result, result, Swizzle(meta.element)); | ||
| 1931 | return fmt::format("{}.x", result); | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | std::string ARBDecompiler::ImageLoad(Operation operation) { | ||
| 1935 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1936 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1937 | const std::size_t count = operation.GetOperandsCount(); | ||
| 1938 | const std::string_view type = ImageType(meta.image.type); | ||
| 1939 | |||
| 1940 | const std::string temporary = AllocVectorTemporary(); | ||
| 1941 | for (std::size_t i = 0; i < count; ++i) { | ||
| 1942 | AddLine("MOV.S {}.{}, {};", temporary, Swizzle(i), Visit(operation[i])); | ||
| 1943 | } | ||
| 1944 | AddLine("LOADIM.F {}, {}, image[{}], {};", temporary, temporary, image_id, type); | ||
| 1945 | AddLine("MOV.F {}.x, {}.{};", temporary, temporary, Swizzle(meta.element)); | ||
| 1946 | return fmt::format("{}.x", temporary); | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | std::string ARBDecompiler::ImageStore(Operation operation) { | ||
| 1950 | const auto& meta = std::get<MetaImage>(operation.GetMeta()); | ||
| 1951 | const u32 image_id = device.GetBaseBindings(stage).image + meta.image.index; | ||
| 1952 | const std::size_t num_coords = operation.GetOperandsCount(); | ||
| 1953 | const std::size_t num_values = meta.values.size(); | ||
| 1954 | const std::string_view type = ImageType(meta.image.type); | ||
| 1955 | |||
| 1956 | const std::string coord = AllocVectorTemporary(); | ||
| 1957 | const std::string value = AllocVectorTemporary(); | ||
| 1958 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 1959 | AddLine("MOV.S {}.{}, {};", coord, Swizzle(i), Visit(operation[i])); | ||
| 1960 | } | ||
| 1961 | for (std::size_t i = 0; i < num_values; ++i) { | ||
| 1962 | AddLine("MOV.F {}.{}, {};", value, Swizzle(i), Visit(meta.values[i])); | ||
| 1963 | } | ||
| 1964 | AddLine("STOREIM.F image[{}], {}, {}, {};", image_id, value, coord, type); | ||
| 1965 | return {}; | ||
| 1966 | } | ||
| 1967 | |||
| 1968 | std::string ARBDecompiler::Branch(Operation operation) { | ||
| 1969 | const auto target = std::get<ImmediateNode>(*operation[0]); | ||
| 1970 | AddLine("MOV.U PC.x, {};", target.GetValue()); | ||
| 1971 | AddLine("CONT;"); | ||
| 1972 | return {}; | ||
| 1973 | } | ||
| 1974 | |||
| 1975 | std::string ARBDecompiler::BranchIndirect(Operation operation) { | ||
| 1976 | AddLine("MOV.U PC.x, {};", Visit(operation[0])); | ||
| 1977 | AddLine("CONT;"); | ||
| 1978 | return {}; | ||
| 1979 | } | ||
| 1980 | |||
| 1981 | std::string ARBDecompiler::PushFlowStack(Operation operation) { | ||
| 1982 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1983 | const u32 target = std::get<ImmediateNode>(*operation[0]).GetValue(); | ||
| 1984 | const std::string_view stack_name = StackName(stack); | ||
| 1985 | AddLine("MOV.U {}[{}_TOP.x].x, {};", stack_name, stack_name, target); | ||
| 1986 | AddLine("ADD.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1987 | return {}; | ||
| 1988 | } | ||
| 1989 | |||
| 1990 | std::string ARBDecompiler::PopFlowStack(Operation operation) { | ||
| 1991 | const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||
| 1992 | const std::string_view stack_name = StackName(stack); | ||
| 1993 | AddLine("SUB.S {}_TOP.x, {}_TOP.x, 1;", stack_name, stack_name); | ||
| 1994 | AddLine("MOV.U PC.x, {}[{}_TOP.x].x;", stack_name, stack_name); | ||
| 1995 | AddLine("CONT;"); | ||
| 1996 | return {}; | ||
| 1997 | } | ||
| 1998 | |||
| 1999 | std::string ARBDecompiler::Exit(Operation) { | ||
| 2000 | Exit(); | ||
| 2001 | return {}; | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | std::string ARBDecompiler::Discard(Operation) { | ||
| 2005 | AddLine("KIL TR;"); | ||
| 2006 | return {}; | ||
| 2007 | } | ||
| 2008 | |||
| 2009 | std::string ARBDecompiler::EmitVertex(Operation) { | ||
| 2010 | AddLine("EMIT;"); | ||
| 2011 | return {}; | ||
| 2012 | } | ||
| 2013 | |||
| 2014 | std::string ARBDecompiler::EndPrimitive(Operation) { | ||
| 2015 | AddLine("ENDPRIM;"); | ||
| 2016 | return {}; | ||
| 2017 | } | ||
| 2018 | |||
| 2019 | std::string ARBDecompiler::InvocationId(Operation) { | ||
| 2020 | return "primitive.invocation"; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | std::string ARBDecompiler::YNegate(Operation) { | ||
| 2024 | LOG_WARNING(Render_OpenGL, "(STUBBED)"); | ||
| 2025 | const std::string temporary = AllocTemporary(); | ||
| 2026 | AddLine("MOV.F {}, 1;", temporary); | ||
| 2027 | return temporary; | ||
| 2028 | } | ||
| 2029 | |||
| 2030 | std::string ARBDecompiler::ThreadId(Operation) { | ||
| 2031 | return fmt::format("{}.threadid", StageInputName(stage)); | ||
| 2032 | } | ||
| 2033 | |||
| 2034 | std::string ARBDecompiler::ShuffleIndexed(Operation operation) { | ||
| 2035 | if (!device.HasWarpIntrinsics()) { | ||
| 2036 | LOG_ERROR(Render_OpenGL, | ||
| 2037 | "NV_shader_thread_shuffle is missing. Kepler or better is required."); | ||
| 2038 | return Visit(operation[0]); | ||
| 2039 | } | ||
| 2040 | const std::string temporary = AllocVectorTemporary(); | ||
| 2041 | AddLine("SHFIDX.U {}, {}, {}, {{31, 0, 0, 0}};", temporary, Visit(operation[0]), | ||
| 2042 | Visit(operation[1])); | ||
| 2043 | AddLine("MOV.U {}.x, {}.y;", temporary, temporary); | ||
| 2044 | return fmt::format("{}.x", temporary); | ||
| 2045 | } | ||
| 2046 | |||
| 2047 | std::string ARBDecompiler::Barrier(Operation) { | ||
| 2048 | if (!ir.IsDecompiled()) { | ||
| 2049 | LOG_ERROR(Render_OpenGL, "BAR used but shader is not decompiled"); | ||
| 2050 | return {}; | ||
| 2051 | } | ||
| 2052 | AddLine("BAR;"); | ||
| 2053 | return {}; | ||
| 2054 | } | ||
| 2055 | |||
| 2056 | std::string ARBDecompiler::MemoryBarrierGroup(Operation) { | ||
| 2057 | AddLine("MEMBAR.CTA;"); | ||
| 2058 | return {}; | ||
| 2059 | } | ||
| 2060 | |||
| 2061 | std::string ARBDecompiler::MemoryBarrierGlobal(Operation) { | ||
| 2062 | AddLine("MEMBAR;"); | ||
| 2063 | return {}; | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | } // Anonymous namespace | ||
| 2067 | |||
| 2068 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 2069 | const VideoCommon::Shader::Registry& registry, | ||
| 2070 | Tegra::Engines::ShaderType stage, std::string_view identifier) { | ||
| 2071 | return ARBDecompiler(device, ir, registry, stage, identifier).Code(); | ||
| 2072 | } | ||
| 2073 | |||
| 2074 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_arb_decompiler.h b/src/video_core/renderer_opengl/gl_arb_decompiler.h new file mode 100644 index 000000000..6afc87220 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | |||
| 12 | namespace Tegra::Engines { | ||
| 13 | enum class ShaderType : u32; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | class ShaderIR; | ||
| 18 | class Registry; | ||
| 19 | } // namespace VideoCommon::Shader | ||
| 20 | |||
| 21 | namespace OpenGL { | ||
| 22 | |||
| 23 | class Device; | ||
| 24 | |||
| 25 | std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||
| 26 | const VideoCommon::Shader::Registry& registry, | ||
| 27 | Tegra::Engines::ShaderType stage, std::string_view identifier); | ||
| 28 | |||
| 29 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d2cab50bd..ad0577a4f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | 10 | #include "common/microprofile.h" |
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 12 | #include "video_core/rasterizer_interface.h" | 13 | #include "video_core/rasterizer_interface.h" |
| 13 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 14 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| @@ -21,13 +22,12 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 21 | 22 | ||
| 22 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 23 | 24 | ||
| 24 | CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) | 25 | Buffer::Buffer(VAddr cpu_addr, const std::size_t size) : VideoCommon::BufferBlock{cpu_addr, size} { |
| 25 | : VideoCommon::BufferBlock{cpu_addr, size} { | ||
| 26 | gl_buffer.Create(); | 26 | gl_buffer.Create(); |
| 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | CachedBufferBlock::~CachedBufferBlock() = default; | 30 | Buffer::~Buffer() = default; |
| 31 | 31 | ||
| 32 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 32 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| 33 | const Device& device, std::size_t stream_size) | 33 | const Device& device, std::size_t stream_size) |
| @@ -47,12 +47,8 @@ OGLBufferCache::~OGLBufferCache() { | |||
| 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 50 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 51 | return std::make_shared<CachedBufferBlock>(cpu_addr, size); | 51 | return std::make_shared<Buffer>(cpu_addr, size); |
| 52 | } | ||
| 53 | |||
| 54 | GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { | ||
| 55 | return buffer->GetHandle(); | ||
| 56 | } | 52 | } |
| 57 | 53 | ||
| 58 | GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { | 54 | GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { |
| @@ -61,7 +57,7 @@ GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { | |||
| 61 | 57 | ||
| 62 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 58 | void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 63 | const u8* data) { | 59 | const u8* data) { |
| 64 | glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), | 60 | glNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), |
| 65 | static_cast<GLsizeiptr>(size), data); | 61 | static_cast<GLsizeiptr>(size), data); |
| 66 | } | 62 | } |
| 67 | 63 | ||
| @@ -69,20 +65,20 @@ void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 69 | u8* data) { | 65 | u8* data) { |
| 70 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | 66 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); |
| 71 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | 67 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); |
| 72 | glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), | 68 | glGetNamedBufferSubData(buffer.Handle(), static_cast<GLintptr>(offset), |
| 73 | static_cast<GLsizeiptr>(size), data); | 69 | static_cast<GLsizeiptr>(size), data); |
| 74 | } | 70 | } |
| 75 | 71 | ||
| 76 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | 72 | void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 77 | std::size_t dst_offset, std::size_t size) { | 73 | std::size_t dst_offset, std::size_t size) { |
| 78 | glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset), | 74 | glCopyNamedBufferSubData(src.Handle(), dst.Handle(), static_cast<GLintptr>(src_offset), |
| 79 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); | 75 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); |
| 80 | } | 76 | } |
| 81 | 77 | ||
| 82 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | 78 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, |
| 83 | std::size_t size) { | 79 | std::size_t size) { |
| 84 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | 80 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); |
| 85 | const GLuint& cbuf = cbufs[cbuf_cursor++]; | 81 | const GLuint cbuf = cbufs[cbuf_cursor++]; |
| 86 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | 82 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); |
| 87 | return {cbuf, 0}; | 83 | return {cbuf, 0}; |
| 88 | } | 84 | } |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a9e86cfc7..a49aaf9c4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | 11 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/rasterizer_cache.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 16 | 15 | ||
| @@ -24,17 +23,12 @@ class Device; | |||
| 24 | class OGLStreamBuffer; | 23 | class OGLStreamBuffer; |
| 25 | class RasterizerOpenGL; | 24 | class RasterizerOpenGL; |
| 26 | 25 | ||
| 27 | class CachedBufferBlock; | 26 | class Buffer : public VideoCommon::BufferBlock { |
| 28 | |||
| 29 | using Buffer = std::shared_ptr<CachedBufferBlock>; | ||
| 30 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | ||
| 31 | |||
| 32 | class CachedBufferBlock : public VideoCommon::BufferBlock { | ||
| 33 | public: | 27 | public: |
| 34 | explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); | 28 | explicit Buffer(VAddr cpu_addr, const std::size_t size); |
| 35 | ~CachedBufferBlock(); | 29 | ~Buffer(); |
| 36 | 30 | ||
| 37 | GLuint GetHandle() const { | 31 | GLuint Handle() const { |
| 38 | return gl_buffer.handle; | 32 | return gl_buffer.handle; |
| 39 | } | 33 | } |
| 40 | 34 | ||
| @@ -42,6 +36,7 @@ private: | |||
| 42 | OGLBuffer gl_buffer; | 36 | OGLBuffer gl_buffer; |
| 43 | }; | 37 | }; |
| 44 | 38 | ||
| 39 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | ||
| 45 | class OGLBufferCache final : public GenericBufferCache { | 40 | class OGLBufferCache final : public GenericBufferCache { |
| 46 | public: | 41 | public: |
| 47 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, | 42 | explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -55,9 +50,7 @@ public: | |||
| 55 | } | 50 | } |
| 56 | 51 | ||
| 57 | protected: | 52 | protected: |
| 58 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; | 53 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 59 | |||
| 60 | GLuint ToHandle(const Buffer& buffer) override; | ||
| 61 | 54 | ||
| 62 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 55 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 63 | const u8* data) override; | 56 | const u8* data) override; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index d83dca25a..e245e27ec 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <limits> | ||
| 9 | #include <optional> | 10 | #include <optional> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | 12 | ||
| @@ -13,6 +14,7 @@ | |||
| 13 | 14 | ||
| 14 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 15 | #include "common/scope_exit.h" | 16 | #include "common/scope_exit.h" |
| 17 | #include "core/settings.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_device.h" | 18 | #include "video_core/renderer_opengl/gl_device.h" |
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 19 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 18 | 20 | ||
| @@ -25,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1; | |||
| 25 | 27 | ||
| 26 | constexpr u32 NumStages = 5; | 28 | constexpr u32 NumStages = 5; |
| 27 | 29 | ||
| 28 | constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | 30 | constexpr std::array LimitUBOs = { |
| 29 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, | 31 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, |
| 30 | GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; | 32 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, |
| 33 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; | ||
| 31 | 34 | ||
| 32 | constexpr std::array LimitSSBOs = { | 35 | constexpr std::array LimitSSBOs = { |
| 33 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | 36 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, |
| 34 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | 37 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, |
| 35 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; | 38 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; |
| 36 | 39 | ||
| 37 | constexpr std::array LimitSamplers = { | 40 | constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, |
| 38 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | 41 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, |
| 39 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | 42 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, |
| 40 | GL_MAX_TEXTURE_IMAGE_UNITS}; | 43 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, |
| 44 | GL_MAX_TEXTURE_IMAGE_UNITS, | ||
| 45 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; | ||
| 41 | 46 | ||
| 42 | constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, | 47 | constexpr std::array LimitImages = { |
| 43 | GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | 48 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, |
| 44 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, | 49 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, |
| 45 | GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; | 50 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; |
| 46 | 51 | ||
| 47 | template <typename T> | 52 | template <typename T> |
| 48 | T GetInteger(GLenum pname) { | 53 | T GetInteger(GLenum pname) { |
| @@ -84,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | |||
| 84 | return std::exchange(base, base + amount); | 89 | return std::exchange(base, base + amount); |
| 85 | } | 90 | } |
| 86 | 91 | ||
| 92 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { | ||
| 93 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; | ||
| 94 | std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), | ||
| 95 | [](GLenum pname) { return GetInteger<u32>(pname); }); | ||
| 96 | return max; | ||
| 97 | } | ||
| 98 | |||
| 87 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { | 99 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { |
| 88 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; | 100 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; |
| 89 | 101 | ||
| @@ -132,6 +144,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 132 | } | 144 | } |
| 133 | 145 | ||
| 134 | bool IsASTCSupported() { | 146 | bool IsASTCSupported() { |
| 147 | static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; | ||
| 135 | static constexpr std::array formats = { | 148 | static constexpr std::array formats = { |
| 136 | GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, | 149 | GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, |
| 137 | GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, | 150 | GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, |
| @@ -148,25 +161,43 @@ bool IsASTCSupported() { | |||
| 148 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, | 161 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, |
| 149 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, | 162 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, |
| 150 | }; | 163 | }; |
| 151 | return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { | 164 | static constexpr std::array required_support = { |
| 152 | GLint supported; | 165 | GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, |
| 153 | glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, | 166 | GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, |
| 154 | &supported); | 167 | }; |
| 155 | return supported == GL_TRUE; | 168 | |
| 156 | }) == formats.end(); | 169 | for (const GLenum target : targets) { |
| 170 | for (const GLenum format : formats) { | ||
| 171 | for (const GLenum support : required_support) { | ||
| 172 | GLint value; | ||
| 173 | glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); | ||
| 174 | if (value != GL_FULL_SUPPORT) { | ||
| 175 | return false; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | } | ||
| 180 | return true; | ||
| 157 | } | 181 | } |
| 158 | 182 | ||
| 159 | } // Anonymous namespace | 183 | } // Anonymous namespace |
| 160 | 184 | ||
| 161 | Device::Device() : base_bindings{BuildBaseBindings()} { | 185 | Device::Device() |
| 186 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { | ||
| 162 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | 187 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); |
| 163 | const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); | 188 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); |
| 164 | const std::vector extensions = GetExtensions(); | 189 | const std::vector extensions = GetExtensions(); |
| 165 | 190 | ||
| 166 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | 191 | const bool is_nvidia = vendor == "NVIDIA Corporation"; |
| 167 | const bool is_amd = vendor == "ATI Technologies Inc."; | 192 | const bool is_amd = vendor == "ATI Technologies Inc."; |
| 168 | const bool is_intel = vendor == "Intel"; | 193 | |
| 169 | const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; | 194 | bool disable_fast_buffer_sub_data = false; |
| 195 | if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { | ||
| 196 | LOG_WARNING( | ||
| 197 | Render_OpenGL, | ||
| 198 | "Beta driver 443.24 is known to have issues. There might be performance issues."); | ||
| 199 | disable_fast_buffer_sub_data = true; | ||
| 200 | } | ||
| 170 | 201 | ||
| 171 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 202 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 172 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 203 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| @@ -181,16 +212,25 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 181 | has_variable_aoffi = TestVariableAoffi(); | 212 | has_variable_aoffi = TestVariableAoffi(); |
| 182 | has_component_indexing_bug = is_amd; | 213 | has_component_indexing_bug = is_amd; |
| 183 | has_precise_bug = TestPreciseBug(); | 214 | has_precise_bug = TestPreciseBug(); |
| 184 | has_broken_compute = is_intel_proprietary; | 215 | has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; |
| 185 | has_fast_buffer_sub_data = is_nvidia; | 216 | has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; |
| 217 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | ||
| 218 | GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && | ||
| 219 | GLAD_GL_NV_transform_feedback2; | ||
| 186 | 220 | ||
| 187 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); | 221 | LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); |
| 188 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); | 222 | LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); |
| 189 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); | 223 | LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); |
| 224 | |||
| 225 | if (Settings::values.use_assembly_shaders && !use_assembly_shaders) { | ||
| 226 | LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported"); | ||
| 227 | } | ||
| 190 | } | 228 | } |
| 191 | 229 | ||
| 192 | Device::Device(std::nullptr_t) { | 230 | Device::Device(std::nullptr_t) { |
| 193 | uniform_buffer_alignment = 0; | 231 | max_uniform_buffers.fill(std::numeric_limits<u32>::max()); |
| 232 | uniform_buffer_alignment = 4; | ||
| 233 | shader_storage_alignment = 4; | ||
| 194 | max_vertex_attributes = 16; | 234 | max_vertex_attributes = 16; |
| 195 | max_varyings = 15; | 235 | max_varyings = 15; |
| 196 | has_warp_intrinsics = true; | 236 | has_warp_intrinsics = true; |
| @@ -198,9 +238,6 @@ Device::Device(std::nullptr_t) { | |||
| 198 | has_vertex_viewport_layer = true; | 238 | has_vertex_viewport_layer = true; |
| 199 | has_image_load_formatted = true; | 239 | has_image_load_formatted = true; |
| 200 | has_variable_aoffi = true; | 240 | has_variable_aoffi = true; |
| 201 | has_component_indexing_bug = false; | ||
| 202 | has_broken_compute = false; | ||
| 203 | has_precise_bug = false; | ||
| 204 | } | 241 | } |
| 205 | 242 | ||
| 206 | bool Device::TestVariableAoffi() { | 243 | bool Device::TestVariableAoffi() { |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index a55050cb5..145347943 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -24,6 +24,10 @@ public: | |||
| 24 | explicit Device(); | 24 | explicit Device(); |
| 25 | explicit Device(std::nullptr_t); | 25 | explicit Device(std::nullptr_t); |
| 26 | 26 | ||
| 27 | u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { | ||
| 28 | return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; | ||
| 29 | } | ||
| 30 | |||
| 27 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { | 31 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { |
| 28 | return base_bindings[stage_index]; | 32 | return base_bindings[stage_index]; |
| 29 | } | 33 | } |
| @@ -80,19 +84,24 @@ public: | |||
| 80 | return has_precise_bug; | 84 | return has_precise_bug; |
| 81 | } | 85 | } |
| 82 | 86 | ||
| 83 | bool HasBrokenCompute() const { | ||
| 84 | return has_broken_compute; | ||
| 85 | } | ||
| 86 | |||
| 87 | bool HasFastBufferSubData() const { | 87 | bool HasFastBufferSubData() const { |
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | bool HasNvViewportArray2() const { | ||
| 92 | return has_nv_viewport_array2; | ||
| 93 | } | ||
| 94 | |||
| 95 | bool UseAssemblyShaders() const { | ||
| 96 | return use_assembly_shaders; | ||
| 97 | } | ||
| 98 | |||
| 91 | private: | 99 | private: |
| 92 | static bool TestVariableAoffi(); | 100 | static bool TestVariableAoffi(); |
| 93 | static bool TestPreciseBug(); | 101 | static bool TestPreciseBug(); |
| 94 | 102 | ||
| 95 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; | 103 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; |
| 104 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; | ||
| 96 | std::size_t uniform_buffer_alignment{}; | 105 | std::size_t uniform_buffer_alignment{}; |
| 97 | std::size_t shader_storage_alignment{}; | 106 | std::size_t shader_storage_alignment{}; |
| 98 | u32 max_vertex_attributes{}; | 107 | u32 max_vertex_attributes{}; |
| @@ -105,8 +114,9 @@ private: | |||
| 105 | bool has_variable_aoffi{}; | 114 | bool has_variable_aoffi{}; |
| 106 | bool has_component_indexing_bug{}; | 115 | bool has_component_indexing_bug{}; |
| 107 | bool has_precise_bug{}; | 116 | bool has_precise_bug{}; |
| 108 | bool has_broken_compute{}; | ||
| 109 | bool has_fast_buffer_sub_data{}; | 117 | bool has_fast_buffer_sub_data{}; |
| 118 | bool has_nv_viewport_array2{}; | ||
| 119 | bool use_assembly_shaders{}; | ||
| 110 | }; | 120 | }; |
| 111 | 121 | ||
| 112 | } // namespace OpenGL | 122 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 99ddcb3f8..ec5421afa 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | 6 | ||
| 7 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 7 | #include "video_core/renderer_opengl/gl_fence_manager.h" | 8 | #include "video_core/renderer_opengl/gl_fence_manager.h" |
| 8 | 9 | ||
| 9 | namespace OpenGL { | 10 | namespace OpenGL { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 69dcf952f..2d6c11320 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 32 | #include "video_core/renderer_opengl/renderer_opengl.h" | 32 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 33 | #include "video_core/shader_cache.h" | ||
| 33 | 34 | ||
| 34 | namespace OpenGL { | 35 | namespace OpenGL { |
| 35 | 36 | ||
| @@ -54,15 +55,33 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 | |||
| 54 | 55 | ||
| 55 | namespace { | 56 | namespace { |
| 56 | 57 | ||
| 58 | constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; | ||
| 59 | constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | ||
| 60 | NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; | ||
| 61 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = | ||
| 62 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | ||
| 63 | |||
| 57 | constexpr std::size_t NumSupportedVertexAttributes = 16; | 64 | constexpr std::size_t NumSupportedVertexAttributes = 16; |
| 58 | 65 | ||
| 59 | template <typename Engine, typename Entry> | 66 | template <typename Engine, typename Entry> |
| 60 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 67 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 61 | ShaderType shader_type, std::size_t index = 0) { | 68 | ShaderType shader_type, std::size_t index = 0) { |
| 69 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 70 | if (entry.is_separated) { | ||
| 71 | const u32 buffer_1 = entry.buffer; | ||
| 72 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 73 | const u32 offset_1 = entry.offset; | ||
| 74 | const u32 offset_2 = entry.secondary_offset; | ||
| 75 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 76 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 77 | return engine.GetTextureInfo(handle_1 | handle_2); | ||
| 78 | } | ||
| 79 | } | ||
| 62 | if (entry.is_bindless) { | 80 | if (entry.is_bindless) { |
| 63 | const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | 81 | const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |
| 64 | return engine.GetTextureInfo(tex_handle); | 82 | return engine.GetTextureInfo(handle); |
| 65 | } | 83 | } |
| 84 | |||
| 66 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | 85 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); |
| 67 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | 86 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); |
| 68 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | 87 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { |
| @@ -87,6 +106,34 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | |||
| 87 | return buffer.size; | 106 | return buffer.size; |
| 88 | } | 107 | } |
| 89 | 108 | ||
| 109 | /// Translates hardware transform feedback indices | ||
| 110 | /// @param location Hardware location | ||
| 111 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | ||
| 112 | /// @note Read https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_transform_feedback3.txt | ||
| 113 | std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) { | ||
| 114 | const u8 index = location / 4; | ||
| 115 | if (index >= 8 && index <= 39) { | ||
| 116 | return {GL_GENERIC_ATTRIB_NV, index - 8}; | ||
| 117 | } | ||
| 118 | if (index >= 48 && index <= 55) { | ||
| 119 | return {GL_TEXTURE_COORD_NV, index - 48}; | ||
| 120 | } | ||
| 121 | switch (index) { | ||
| 122 | case 7: | ||
| 123 | return {GL_POSITION, 0}; | ||
| 124 | case 40: | ||
| 125 | return {GL_PRIMARY_COLOR_NV, 0}; | ||
| 126 | case 41: | ||
| 127 | return {GL_SECONDARY_COLOR_NV, 0}; | ||
| 128 | case 42: | ||
| 129 | return {GL_BACK_PRIMARY_COLOR_NV, 0}; | ||
| 130 | case 43: | ||
| 131 | return {GL_BACK_SECONDARY_COLOR_NV, 0}; | ||
| 132 | } | ||
| 133 | UNIMPLEMENTED_MSG("index={}", static_cast<int>(index)); | ||
| 134 | return {GL_POSITION, 0}; | ||
| 135 | } | ||
| 136 | |||
| 90 | void oglEnable(GLenum cap, bool state) { | 137 | void oglEnable(GLenum cap, bool state) { |
| 91 | (state ? glEnable : glDisable)(cap); | 138 | (state ? glEnable : glDisable)(cap); |
| 92 | } | 139 | } |
| @@ -94,17 +141,33 @@ void oglEnable(GLenum cap, bool state) { | |||
| 94 | } // Anonymous namespace | 141 | } // Anonymous namespace |
| 95 | 142 | ||
| 96 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 143 | RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 97 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 144 | const Device& device, ScreenInfo& info, |
| 98 | StateTracker& state_tracker) | 145 | ProgramManager& program_manager, StateTracker& state_tracker) |
| 99 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, | 146 | : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device, |
| 147 | state_tracker}, | ||
| 100 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, | 148 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, |
| 101 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, | 149 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, |
| 102 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, | 150 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, |
| 103 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | 151 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { |
| 104 | CheckExtensions(); | 152 | CheckExtensions(); |
| 153 | |||
| 154 | unified_uniform_buffer.Create(); | ||
| 155 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); | ||
| 156 | |||
| 157 | if (device.UseAssemblyShaders()) { | ||
| 158 | glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 159 | for (const GLuint cbuf : staging_cbufs) { | ||
| 160 | glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), | ||
| 161 | nullptr, 0); | ||
| 162 | } | ||
| 163 | } | ||
| 105 | } | 164 | } |
| 106 | 165 | ||
| 107 | RasterizerOpenGL::~RasterizerOpenGL() {} | 166 | RasterizerOpenGL::~RasterizerOpenGL() { |
| 167 | if (device.UseAssemblyShaders()) { | ||
| 168 | glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 169 | } | ||
| 170 | } | ||
| 108 | 171 | ||
| 109 | void RasterizerOpenGL::CheckExtensions() { | 172 | void RasterizerOpenGL::CheckExtensions() { |
| 110 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { | 173 | if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { |
| @@ -230,6 +293,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { | |||
| 230 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | 293 | void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { |
| 231 | MICROPROFILE_SCOPE(OpenGL_Shader); | 294 | MICROPROFILE_SCOPE(OpenGL_Shader); |
| 232 | auto& gpu = system.GPU().Maxwell3D(); | 295 | auto& gpu = system.GPU().Maxwell3D(); |
| 296 | std::size_t num_ssbos = 0; | ||
| 233 | u32 clip_distances = 0; | 297 | u32 clip_distances = 0; |
| 234 | 298 | ||
| 235 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 299 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| @@ -259,7 +323,15 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 259 | continue; | 323 | continue; |
| 260 | } | 324 | } |
| 261 | 325 | ||
| 262 | Shader shader{shader_cache.GetStageProgram(program)}; | 326 | Shader* const shader = shader_cache.GetStageProgram(program); |
| 327 | |||
| 328 | if (device.UseAssemblyShaders()) { | ||
| 329 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this | ||
| 330 | // all stages share the same bindings. | ||
| 331 | const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size(); | ||
| 332 | ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage"); | ||
| 333 | num_ssbos += num_stage_ssbos; | ||
| 334 | } | ||
| 263 | 335 | ||
| 264 | // Stage indices are 0 - 5 | 336 | // Stage indices are 0 - 5 |
| 265 | const std::size_t stage = index == 0 ? 0 : index - 1; | 337 | const std::size_t stage = index == 0 ? 0 : index - 1; |
| @@ -526,6 +598,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 526 | SyncFramebufferSRGB(); | 598 | SyncFramebufferSRGB(); |
| 527 | 599 | ||
| 528 | buffer_cache.Acquire(); | 600 | buffer_cache.Acquire(); |
| 601 | current_cbuf = 0; | ||
| 529 | 602 | ||
| 530 | std::size_t buffer_size = CalculateVertexArraysSize(); | 603 | std::size_t buffer_size = CalculateVertexArraysSize(); |
| 531 | 604 | ||
| @@ -535,16 +608,25 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 535 | } | 608 | } |
| 536 | 609 | ||
| 537 | // Uniform space for the 5 shader stages | 610 | // Uniform space for the 5 shader stages |
| 538 | buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + | 611 | buffer_size = |
| 539 | (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * | 612 | Common::AlignUp<std::size_t>(buffer_size, 4) + |
| 540 | Maxwell::MaxShaderStage; | 613 | (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; |
| 541 | 614 | ||
| 542 | // Add space for at least 18 constant buffers | 615 | // Add space for at least 18 constant buffers |
| 543 | buffer_size += Maxwell::MaxConstBuffers * | 616 | buffer_size += Maxwell::MaxConstBuffers * |
| 544 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 617 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 545 | 618 | ||
| 546 | // Prepare the vertex array. | 619 | // Prepare the vertex array. |
| 547 | buffer_cache.Map(buffer_size); | 620 | const bool invalidated = buffer_cache.Map(buffer_size); |
| 621 | |||
| 622 | if (invalidated) { | ||
| 623 | // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty | ||
| 624 | auto& dirty = gpu.dirty.flags; | ||
| 625 | dirty[Dirty::VertexBuffers] = true; | ||
| 626 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 627 | dirty[index] = true; | ||
| 628 | } | ||
| 629 | } | ||
| 548 | 630 | ||
| 549 | // Prepare vertex array format. | 631 | // Prepare vertex array format. |
| 550 | SetupVertexFormat(); | 632 | SetupVertexFormat(); |
| @@ -558,12 +640,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 558 | } | 640 | } |
| 559 | 641 | ||
| 560 | // Setup emulation uniform buffer. | 642 | // Setup emulation uniform buffer. |
| 561 | GLShader::MaxwellUniformData ubo; | 643 | if (!device.UseAssemblyShaders()) { |
| 562 | ubo.SetFromRegs(gpu); | 644 | MaxwellUniformData ubo; |
| 563 | const auto [buffer, offset] = | 645 | ubo.SetFromRegs(gpu); |
| 564 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | 646 | const auto [buffer, offset] = |
| 565 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, | 647 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); |
| 566 | static_cast<GLsizeiptr>(sizeof(ubo))); | 648 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, |
| 649 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 650 | } | ||
| 567 | 651 | ||
| 568 | // Setup shaders and their used resources. | 652 | // Setup shaders and their used resources. |
| 569 | texture_cache.GuardSamplers(true); | 653 | texture_cache.GuardSamplers(true); |
| @@ -630,16 +714,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 630 | } | 714 | } |
| 631 | 715 | ||
| 632 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 716 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| 633 | if (device.HasBrokenCompute()) { | ||
| 634 | return; | ||
| 635 | } | ||
| 636 | |||
| 637 | buffer_cache.Acquire(); | 717 | buffer_cache.Acquire(); |
| 718 | current_cbuf = 0; | ||
| 638 | 719 | ||
| 639 | auto kernel = shader_cache.GetComputeKernel(code_addr); | 720 | auto kernel = shader_cache.GetComputeKernel(code_addr); |
| 640 | SetupComputeTextures(kernel); | 721 | SetupComputeTextures(kernel); |
| 641 | SetupComputeImages(kernel); | 722 | SetupComputeImages(kernel); |
| 642 | program_manager.BindComputeShader(kernel->GetHandle()); | ||
| 643 | 723 | ||
| 644 | const std::size_t buffer_size = | 724 | const std::size_t buffer_size = |
| 645 | Tegra::Engines::KeplerCompute::NumConstBuffers * | 725 | Tegra::Engines::KeplerCompute::NumConstBuffers * |
| @@ -652,6 +732,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | |||
| 652 | buffer_cache.Unmap(); | 732 | buffer_cache.Unmap(); |
| 653 | 733 | ||
| 654 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 734 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 735 | program_manager.BindCompute(kernel->GetHandle()); | ||
| 655 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 736 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| 656 | ++num_queued_commands; | 737 | ++num_queued_commands; |
| 657 | } | 738 | } |
| @@ -701,15 +782,15 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 701 | return; | 782 | return; |
| 702 | } | 783 | } |
| 703 | texture_cache.OnCPUWrite(addr, size); | 784 | texture_cache.OnCPUWrite(addr, size); |
| 704 | shader_cache.InvalidateRegion(addr, size); | 785 | shader_cache.OnCPUWrite(addr, size); |
| 705 | buffer_cache.OnCPUWrite(addr, size); | 786 | buffer_cache.OnCPUWrite(addr, size); |
| 706 | query_cache.InvalidateRegion(addr, size); | ||
| 707 | } | 787 | } |
| 708 | 788 | ||
| 709 | void RasterizerOpenGL::SyncGuestHost() { | 789 | void RasterizerOpenGL::SyncGuestHost() { |
| 710 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 790 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 711 | texture_cache.SyncGuestHost(); | 791 | texture_cache.SyncGuestHost(); |
| 712 | buffer_cache.SyncGuestHost(); | 792 | buffer_cache.SyncGuestHost(); |
| 793 | shader_cache.SyncGuestHost(); | ||
| 713 | } | 794 | } |
| 714 | 795 | ||
| 715 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | 796 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { |
| @@ -811,40 +892,73 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 811 | return true; | 892 | return true; |
| 812 | } | 893 | } |
| 813 | 894 | ||
| 814 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { | 895 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { |
| 896 | static constexpr std::array PARAMETER_LUT = { | ||
| 897 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 898 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 899 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; | ||
| 900 | |||
| 815 | MICROPROFILE_SCOPE(OpenGL_UBO); | 901 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 816 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; | 902 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 817 | const auto& shader_stage = stages[stage_index]; | 903 | const auto& shader_stage = stages[stage_index]; |
| 904 | const auto& entries = shader->GetEntries(); | ||
| 905 | const bool use_unified = entries.use_unified_uniforms; | ||
| 906 | const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; | ||
| 818 | 907 | ||
| 819 | u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; | 908 | const auto base_bindings = device.GetBaseBindings(stage_index); |
| 820 | for (const auto& entry : shader->GetEntries().const_buffers) { | 909 | u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; |
| 821 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 910 | for (const auto& entry : entries.const_buffers) { |
| 822 | SetupConstBuffer(binding++, buffer, entry); | 911 | const u32 index = entry.GetIndex(); |
| 912 | const auto& buffer = shader_stage.const_buffers[index]; | ||
| 913 | SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, | ||
| 914 | base_unified_offset + index * Maxwell::MaxConstBufferSize); | ||
| 915 | ++binding; | ||
| 916 | } | ||
| 917 | if (use_unified) { | ||
| 918 | const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + | ||
| 919 | entries.global_memory_entries.size()); | ||
| 920 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, | ||
| 921 | base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 823 | } | 922 | } |
| 824 | } | 923 | } |
| 825 | 924 | ||
| 826 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | 925 | void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { |
| 827 | MICROPROFILE_SCOPE(OpenGL_UBO); | 926 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 828 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 927 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 928 | const auto& entries = kernel->GetEntries(); | ||
| 929 | const bool use_unified = entries.use_unified_uniforms; | ||
| 829 | 930 | ||
| 830 | u32 binding = 0; | 931 | u32 binding = 0; |
| 831 | for (const auto& entry : kernel->GetEntries().const_buffers) { | 932 | for (const auto& entry : entries.const_buffers) { |
| 832 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 933 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 833 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 934 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 834 | Tegra::Engines::ConstBufferInfo buffer; | 935 | Tegra::Engines::ConstBufferInfo buffer; |
| 835 | buffer.address = config.Address(); | 936 | buffer.address = config.Address(); |
| 836 | buffer.size = config.size; | 937 | buffer.size = config.size; |
| 837 | buffer.enabled = mask[entry.GetIndex()]; | 938 | buffer.enabled = mask[entry.GetIndex()]; |
| 838 | SetupConstBuffer(binding++, buffer, entry); | 939 | SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, |
| 940 | use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); | ||
| 941 | ++binding; | ||
| 942 | } | ||
| 943 | if (use_unified) { | ||
| 944 | const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); | ||
| 945 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, | ||
| 946 | NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 839 | } | 947 | } |
| 840 | } | 948 | } |
| 841 | 949 | ||
| 842 | void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 950 | void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, |
| 843 | const ConstBufferEntry& entry) { | 951 | const Tegra::Engines::ConstBufferInfo& buffer, |
| 952 | const ConstBufferEntry& entry, bool use_unified, | ||
| 953 | std::size_t unified_offset) { | ||
| 844 | if (!buffer.enabled) { | 954 | if (!buffer.enabled) { |
| 845 | // Set values to zero to unbind buffers | 955 | // Set values to zero to unbind buffers |
| 846 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, | 956 | if (device.UseAssemblyShaders()) { |
| 847 | sizeof(float)); | 957 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); |
| 958 | } else { | ||
| 959 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | ||
| 960 | buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); | ||
| 961 | } | ||
| 848 | return; | 962 | return; |
| 849 | } | 963 | } |
| 850 | 964 | ||
| @@ -852,18 +966,38 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const | |||
| 852 | // UBO alignment requirements. | 966 | // UBO alignment requirements. |
| 853 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | 967 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 854 | 968 | ||
| 855 | const auto alignment = device.GetUniformBufferAlignment(); | 969 | const bool fast_upload = !use_unified && device.HasFastBufferSubData(); |
| 856 | const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, | 970 | |
| 857 | device.HasFastBufferSubData()); | 971 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); |
| 858 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | 972 | const GPUVAddr gpu_addr = buffer.address; |
| 973 | auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); | ||
| 974 | |||
| 975 | if (device.UseAssemblyShaders()) { | ||
| 976 | UNIMPLEMENTED_IF(use_unified); | ||
| 977 | if (offset != 0) { | ||
| 978 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | ||
| 979 | glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); | ||
| 980 | cbuf = staging_cbuf; | ||
| 981 | offset = 0; | ||
| 982 | } | ||
| 983 | glBindBufferRangeNV(stage, binding, cbuf, offset, size); | ||
| 984 | return; | ||
| 985 | } | ||
| 986 | |||
| 987 | if (use_unified) { | ||
| 988 | glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); | ||
| 989 | } else { | ||
| 990 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | ||
| 991 | } | ||
| 859 | } | 992 | } |
| 860 | 993 | ||
| 861 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { | 994 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { |
| 862 | auto& gpu{system.GPU()}; | 995 | auto& gpu{system.GPU()}; |
| 863 | auto& memory_manager{gpu.MemoryManager()}; | 996 | auto& memory_manager{gpu.MemoryManager()}; |
| 864 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 997 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| 865 | 998 | ||
| 866 | u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; | 999 | u32 binding = |
| 1000 | device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||
| 867 | for (const auto& entry : shader->GetEntries().global_memory_entries) { | 1001 | for (const auto& entry : shader->GetEntries().global_memory_entries) { |
| 868 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | 1002 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; |
| 869 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; | 1003 | const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; |
| @@ -872,7 +1006,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 872 | } | 1006 | } |
| 873 | } | 1007 | } |
| 874 | 1008 | ||
| 875 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | 1009 | void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { |
| 876 | auto& gpu{system.GPU()}; | 1010 | auto& gpu{system.GPU()}; |
| 877 | auto& memory_manager{gpu.MemoryManager()}; | 1011 | auto& memory_manager{gpu.MemoryManager()}; |
| 878 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 1012 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| @@ -895,7 +1029,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e | |||
| 895 | static_cast<GLsizeiptr>(size)); | 1029 | static_cast<GLsizeiptr>(size)); |
| 896 | } | 1030 | } |
| 897 | 1031 | ||
| 898 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { | 1032 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { |
| 899 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1033 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 900 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 1034 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 901 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 1035 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| @@ -908,7 +1042,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | |||
| 908 | } | 1042 | } |
| 909 | } | 1043 | } |
| 910 | 1044 | ||
| 911 | void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | 1045 | void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { |
| 912 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1046 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 913 | const auto& compute = system.GPU().KeplerCompute(); | 1047 | const auto& compute = system.GPU().KeplerCompute(); |
| 914 | u32 binding = 0; | 1048 | u32 binding = 0; |
| @@ -929,19 +1063,15 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu | |||
| 929 | glBindTextureUnit(binding, 0); | 1063 | glBindTextureUnit(binding, 0); |
| 930 | return; | 1064 | return; |
| 931 | } | 1065 | } |
| 932 | glBindTextureUnit(binding, view->GetTexture()); | 1066 | const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, |
| 933 | 1067 | texture.tic.z_source, texture.tic.w_source); | |
| 934 | if (view->GetSurfaceParams().IsBuffer()) { | 1068 | glBindTextureUnit(binding, handle); |
| 935 | return; | 1069 | if (!view->GetSurfaceParams().IsBuffer()) { |
| 1070 | glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); | ||
| 936 | } | 1071 | } |
| 937 | // Apply swizzle to textures that are not buffers. | ||
| 938 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | ||
| 939 | texture.tic.w_source); | ||
| 940 | |||
| 941 | glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); | ||
| 942 | } | 1072 | } |
| 943 | 1073 | ||
| 944 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | 1074 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { |
| 945 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 1075 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 946 | u32 binding = device.GetBaseBindings(stage_index).image; | 1076 | u32 binding = device.GetBaseBindings(stage_index).image; |
| 947 | for (const auto& entry : shader->GetEntries().images) { | 1077 | for (const auto& entry : shader->GetEntries().images) { |
| @@ -951,7 +1081,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh | |||
| 951 | } | 1081 | } |
| 952 | } | 1082 | } |
| 953 | 1083 | ||
| 954 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | 1084 | void RasterizerOpenGL::SetupComputeImages(Shader* shader) { |
| 955 | const auto& compute = system.GPU().KeplerCompute(); | 1085 | const auto& compute = system.GPU().KeplerCompute(); |
| 956 | u32 binding = 0; | 1086 | u32 binding = 0; |
| 957 | for (const auto& entry : shader->GetEntries().images) { | 1087 | for (const auto& entry : shader->GetEntries().images) { |
| @@ -967,14 +1097,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t | |||
| 967 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); | 1097 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); |
| 968 | return; | 1098 | return; |
| 969 | } | 1099 | } |
| 970 | if (!tic.IsBuffer()) { | ||
| 971 | view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 972 | } | ||
| 973 | if (entry.is_written) { | 1100 | if (entry.is_written) { |
| 974 | view->MarkAsModified(texture_cache.Tick()); | 1101 | view->MarkAsModified(texture_cache.Tick()); |
| 975 | } | 1102 | } |
| 976 | glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, | 1103 | const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); |
| 977 | view->GetFormat()); | 1104 | glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); |
| 978 | } | 1105 | } |
| 979 | 1106 | ||
| 980 | void RasterizerOpenGL::SyncViewport() { | 1107 | void RasterizerOpenGL::SyncViewport() { |
| @@ -983,6 +1110,26 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 983 | const auto& regs = gpu.regs; | 1110 | const auto& regs = gpu.regs; |
| 984 | 1111 | ||
| 985 | const bool dirty_viewport = flags[Dirty::Viewports]; | 1112 | const bool dirty_viewport = flags[Dirty::Viewports]; |
| 1113 | const bool dirty_clip_control = flags[Dirty::ClipControl]; | ||
| 1114 | |||
| 1115 | if (dirty_clip_control || flags[Dirty::FrontFace]) { | ||
| 1116 | flags[Dirty::FrontFace] = false; | ||
| 1117 | |||
| 1118 | GLenum mode = MaxwellToGL::FrontFace(regs.front_face); | ||
| 1119 | if (regs.screen_y_control.triangle_rast_flip != 0 && | ||
| 1120 | regs.viewport_transform[0].scale_y < 0.0f) { | ||
| 1121 | switch (mode) { | ||
| 1122 | case GL_CW: | ||
| 1123 | mode = GL_CCW; | ||
| 1124 | break; | ||
| 1125 | case GL_CCW: | ||
| 1126 | mode = GL_CW; | ||
| 1127 | break; | ||
| 1128 | } | ||
| 1129 | } | ||
| 1130 | glFrontFace(mode); | ||
| 1131 | } | ||
| 1132 | |||
| 986 | if (dirty_viewport || flags[Dirty::ClipControl]) { | 1133 | if (dirty_viewport || flags[Dirty::ClipControl]) { |
| 987 | flags[Dirty::ClipControl] = false; | 1134 | flags[Dirty::ClipControl] = false; |
| 988 | 1135 | ||
| @@ -1080,11 +1227,6 @@ void RasterizerOpenGL::SyncCullMode() { | |||
| 1080 | glDisable(GL_CULL_FACE); | 1227 | glDisable(GL_CULL_FACE); |
| 1081 | } | 1228 | } |
| 1082 | } | 1229 | } |
| 1083 | |||
| 1084 | if (flags[Dirty::FrontFace]) { | ||
| 1085 | flags[Dirty::FrontFace] = false; | ||
| 1086 | glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); | ||
| 1087 | } | ||
| 1088 | } | 1230 | } |
| 1089 | 1231 | ||
| 1090 | void RasterizerOpenGL::SyncPrimitiveRestart() { | 1232 | void RasterizerOpenGL::SyncPrimitiveRestart() { |
| @@ -1455,12 +1597,70 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { | |||
| 1455 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); | 1597 | oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); |
| 1456 | } | 1598 | } |
| 1457 | 1599 | ||
| 1600 | void RasterizerOpenGL::SyncTransformFeedback() { | ||
| 1601 | // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal | ||
| 1602 | // when this is required. | ||
| 1603 | const auto& regs = system.GPU().Maxwell3D().regs; | ||
| 1604 | |||
| 1605 | static constexpr std::size_t STRIDE = 3; | ||
| 1606 | std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; | ||
| 1607 | std::array<GLint, Maxwell::NumTransformFeedbackBuffers> streams; | ||
| 1608 | |||
| 1609 | GLint* cursor = attribs.data(); | ||
| 1610 | GLint* current_stream = streams.data(); | ||
| 1611 | |||
| 1612 | for (std::size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { | ||
| 1613 | const auto& layout = regs.tfb_layouts[feedback]; | ||
| 1614 | UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); | ||
| 1615 | if (layout.varying_count == 0) { | ||
| 1616 | continue; | ||
| 1617 | } | ||
| 1618 | |||
| 1619 | *current_stream = static_cast<GLint>(feedback); | ||
| 1620 | if (current_stream != streams.data()) { | ||
| 1621 | // When stepping one stream, push the expected token | ||
| 1622 | cursor[0] = GL_NEXT_BUFFER_NV; | ||
| 1623 | cursor[1] = 0; | ||
| 1624 | cursor[2] = 0; | ||
| 1625 | cursor += STRIDE; | ||
| 1626 | } | ||
| 1627 | ++current_stream; | ||
| 1628 | |||
| 1629 | const auto& locations = regs.tfb_varying_locs[feedback]; | ||
| 1630 | std::optional<u8> current_index; | ||
| 1631 | for (u32 offset = 0; offset < layout.varying_count; ++offset) { | ||
| 1632 | const u8 location = locations[offset]; | ||
| 1633 | const u8 index = location / 4; | ||
| 1634 | |||
| 1635 | if (current_index == index) { | ||
| 1636 | // Increase number of components of the previous attachment | ||
| 1637 | ++cursor[-2]; | ||
| 1638 | continue; | ||
| 1639 | } | ||
| 1640 | current_index = index; | ||
| 1641 | |||
| 1642 | std::tie(cursor[0], cursor[2]) = TransformFeedbackEnum(location); | ||
| 1643 | cursor[1] = 1; | ||
| 1644 | cursor += STRIDE; | ||
| 1645 | } | ||
| 1646 | } | ||
| 1647 | |||
| 1648 | const GLsizei num_attribs = static_cast<GLsizei>((cursor - attribs.data()) / STRIDE); | ||
| 1649 | const GLsizei num_strides = static_cast<GLsizei>(current_stream - streams.data()); | ||
| 1650 | glTransformFeedbackStreamAttribsNV(num_attribs, attribs.data(), num_strides, streams.data(), | ||
| 1651 | GL_INTERLEAVED_ATTRIBS); | ||
| 1652 | } | ||
| 1653 | |||
| 1458 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | 1654 | void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { |
| 1459 | const auto& regs = system.GPU().Maxwell3D().regs; | 1655 | const auto& regs = system.GPU().Maxwell3D().regs; |
| 1460 | if (regs.tfb_enabled == 0) { | 1656 | if (regs.tfb_enabled == 0) { |
| 1461 | return; | 1657 | return; |
| 1462 | } | 1658 | } |
| 1463 | 1659 | ||
| 1660 | if (device.UseAssemblyShaders()) { | ||
| 1661 | SyncTransformFeedback(); | ||
| 1662 | } | ||
| 1663 | |||
| 1464 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 1664 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 1465 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 1665 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 1466 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 1666 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| @@ -1487,6 +1687,10 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | |||
| 1487 | static_cast<GLsizeiptr>(size)); | 1687 | static_cast<GLsizeiptr>(size)); |
| 1488 | } | 1688 | } |
| 1489 | 1689 | ||
| 1690 | // We may have to call BeginTransformFeedbackNV here since they seem to call different | ||
| 1691 | // implementations on Nvidia's driver (the pointer is different) but we are using | ||
| 1692 | // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB | ||
| 1693 | // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. | ||
| 1490 | glBeginTransformFeedback(GL_POINTS); | 1694 | glBeginTransformFeedback(GL_POINTS); |
| 1491 | } | 1695 | } |
| 1492 | 1696 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b94c65907..4f082592f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "video_core/engines/const_buffer_info.h" | 19 | #include "video_core/engines/const_buffer_info.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/rasterizer_accelerated.h" | 21 | #include "video_core/rasterizer_accelerated.h" |
| 22 | #include "video_core/rasterizer_cache.h" | ||
| 23 | #include "video_core/rasterizer_interface.h" | 22 | #include "video_core/rasterizer_interface.h" |
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 23 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 24 | #include "video_core/renderer_opengl/gl_device.h" |
| @@ -56,8 +55,8 @@ struct DrawParameters; | |||
| 56 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | 55 | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { |
| 57 | public: | 56 | public: |
| 58 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, | 57 | explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, |
| 59 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 58 | const Device& device, ScreenInfo& info, |
| 60 | StateTracker& state_tracker); | 59 | ProgramManager& program_manager, StateTracker& state_tracker); |
| 61 | ~RasterizerOpenGL() override; | 60 | ~RasterizerOpenGL() override; |
| 62 | 61 | ||
| 63 | void Draw(bool is_indexed, bool is_instanced) override; | 62 | void Draw(bool is_indexed, bool is_instanced) override; |
| @@ -100,40 +99,41 @@ private: | |||
| 100 | void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); | 99 | void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); |
| 101 | 100 | ||
| 102 | /// Configures the current constbuffers to use for the draw command. | 101 | /// Configures the current constbuffers to use for the draw command. |
| 103 | void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); | 102 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); |
| 104 | 103 | ||
| 105 | /// Configures the current constbuffers to use for the kernel invocation. | 104 | /// Configures the current constbuffers to use for the kernel invocation. |
| 106 | void SetupComputeConstBuffers(const Shader& kernel); | 105 | void SetupComputeConstBuffers(Shader* kernel); |
| 107 | 106 | ||
| 108 | /// Configures a constant buffer. | 107 | /// Configures a constant buffer. |
| 109 | void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 108 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 110 | const ConstBufferEntry& entry); | 109 | const ConstBufferEntry& entry, bool use_unified, |
| 110 | std::size_t unified_offset); | ||
| 111 | 111 | ||
| 112 | /// Configures the current global memory entries to use for the draw command. | 112 | /// Configures the current global memory entries to use for the draw command. |
| 113 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); | 113 | void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader); |
| 114 | 114 | ||
| 115 | /// Configures the current global memory entries to use for the kernel invocation. | 115 | /// Configures the current global memory entries to use for the kernel invocation. |
| 116 | void SetupComputeGlobalMemory(const Shader& kernel); | 116 | void SetupComputeGlobalMemory(Shader* kernel); |
| 117 | 117 | ||
| 118 | /// Configures a constant buffer. | 118 | /// Configures a constant buffer. |
| 119 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 119 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 120 | std::size_t size); | 120 | std::size_t size); |
| 121 | 121 | ||
| 122 | /// Configures the current textures to use for the draw command. | 122 | /// Configures the current textures to use for the draw command. |
| 123 | void SetupDrawTextures(std::size_t stage_index, const Shader& shader); | 123 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); |
| 124 | 124 | ||
| 125 | /// Configures the textures used in a compute shader. | 125 | /// Configures the textures used in a compute shader. |
| 126 | void SetupComputeTextures(const Shader& kernel); | 126 | void SetupComputeTextures(Shader* kernel); |
| 127 | 127 | ||
| 128 | /// Configures a texture. | 128 | /// Configures a texture. |
| 129 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 129 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 130 | const SamplerEntry& entry); | 130 | const SamplerEntry& entry); |
| 131 | 131 | ||
| 132 | /// Configures images in a graphics shader. | 132 | /// Configures images in a graphics shader. |
| 133 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); | 133 | void SetupDrawImages(std::size_t stage_index, Shader* shader); |
| 134 | 134 | ||
| 135 | /// Configures images in a compute shader. | 135 | /// Configures images in a compute shader. |
| 136 | void SetupComputeImages(const Shader& shader); | 136 | void SetupComputeImages(Shader* shader); |
| 137 | 137 | ||
| 138 | /// Configures an image. | 138 | /// Configures an image. |
| 139 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | 139 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| @@ -201,6 +201,10 @@ private: | |||
| 201 | /// Syncs the framebuffer sRGB state to match the guest state | 201 | /// Syncs the framebuffer sRGB state to match the guest state |
| 202 | void SyncFramebufferSRGB(); | 202 | void SyncFramebufferSRGB(); |
| 203 | 203 | ||
| 204 | /// Syncs transform feedback state to match guest state | ||
| 205 | /// @note Only valid on assembly shaders | ||
| 206 | void SyncTransformFeedback(); | ||
| 207 | |||
| 204 | /// Begin a transform feedback | 208 | /// Begin a transform feedback |
| 205 | void BeginTransformFeedback(GLenum primitive_mode); | 209 | void BeginTransformFeedback(GLenum primitive_mode); |
| 206 | 210 | ||
| @@ -224,7 +228,7 @@ private: | |||
| 224 | 228 | ||
| 225 | void SetupShaders(GLenum primitive_mode); | 229 | void SetupShaders(GLenum primitive_mode); |
| 226 | 230 | ||
| 227 | const Device device; | 231 | const Device& device; |
| 228 | 232 | ||
| 229 | TextureCacheOpenGL texture_cache; | 233 | TextureCacheOpenGL texture_cache; |
| 230 | ShaderCacheOpenGL shader_cache; | 234 | ShaderCacheOpenGL shader_cache; |
| @@ -236,7 +240,7 @@ private: | |||
| 236 | 240 | ||
| 237 | Core::System& system; | 241 | Core::System& system; |
| 238 | ScreenInfo& screen_info; | 242 | ScreenInfo& screen_info; |
| 239 | GLShader::ProgramManager& program_manager; | 243 | ProgramManager& program_manager; |
| 240 | StateTracker& state_tracker; | 244 | StateTracker& state_tracker; |
| 241 | 245 | ||
| 242 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 246 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| @@ -248,6 +252,13 @@ private: | |||
| 248 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | 252 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> |
| 249 | enabled_transform_feedback_buffers; | 253 | enabled_transform_feedback_buffers; |
| 250 | 254 | ||
| 255 | static constexpr std::size_t NUM_CONSTANT_BUFFERS = | ||
| 256 | Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 257 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||
| 258 | std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; | ||
| 259 | std::size_t current_cbuf = 0; | ||
| 260 | OGLBuffer unified_uniform_buffer; | ||
| 261 | |||
| 251 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 262 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 252 | std::size_t num_queued_commands = 0; | 263 | std::size_t num_queued_commands = 0; |
| 253 | 264 | ||
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 97803d480..a787e27d2 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -125,6 +125,15 @@ void OGLProgram::Release() { | |||
| 125 | handle = 0; | 125 | handle = 0; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | void OGLAssemblyProgram::Release() { | ||
| 129 | if (handle == 0) { | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); | ||
| 133 | glDeleteProgramsARB(1, &handle); | ||
| 134 | handle = 0; | ||
| 135 | } | ||
| 136 | |||
| 128 | void OGLPipeline::Create() { | 137 | void OGLPipeline::Create() { |
| 129 | if (handle != 0) | 138 | if (handle != 0) |
| 130 | return; | 139 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index de93f4212..f8b322227 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -167,6 +167,22 @@ public: | |||
| 167 | GLuint handle = 0; | 167 | GLuint handle = 0; |
| 168 | }; | 168 | }; |
| 169 | 169 | ||
| 170 | class OGLAssemblyProgram : private NonCopyable { | ||
| 171 | public: | ||
| 172 | OGLAssemblyProgram() = default; | ||
| 173 | |||
| 174 | OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {} | ||
| 175 | |||
| 176 | ~OGLAssemblyProgram() { | ||
| 177 | Release(); | ||
| 178 | } | ||
| 179 | |||
| 180 | /// Deletes the internal OpenGL resource | ||
| 181 | void Release(); | ||
| 182 | |||
| 183 | GLuint handle = 0; | ||
| 184 | }; | ||
| 185 | |||
| 170 | class OGLPipeline : private NonCopyable { | 186 | class OGLPipeline : private NonCopyable { |
| 171 | public: | 187 | public: |
| 172 | OGLPipeline() = default; | 188 | OGLPipeline() = default; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 9759a7078..46e780a06 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/engines/shader_type.h" | 21 | #include "video_core/engines/shader_type.h" |
| 22 | #include "video_core/memory_manager.h" | 22 | #include "video_core/memory_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_arb_decompiler.h" | ||
| 23 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 24 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 25 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 26 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| @@ -29,6 +30,7 @@ | |||
| 29 | #include "video_core/shader/memory_util.h" | 30 | #include "video_core/shader/memory_util.h" |
| 30 | #include "video_core/shader/registry.h" | 31 | #include "video_core/shader/registry.h" |
| 31 | #include "video_core/shader/shader_ir.h" | 32 | #include "video_core/shader/shader_ir.h" |
| 33 | #include "video_core/shader_cache.h" | ||
| 32 | 34 | ||
| 33 | namespace OpenGL { | 35 | namespace OpenGL { |
| 34 | 36 | ||
| @@ -97,6 +99,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { | |||
| 97 | return {}; | 99 | return {}; |
| 98 | } | 100 | } |
| 99 | 101 | ||
| 102 | constexpr GLenum AssemblyEnum(ShaderType shader_type) { | ||
| 103 | switch (shader_type) { | ||
| 104 | case ShaderType::Vertex: | ||
| 105 | return GL_VERTEX_PROGRAM_NV; | ||
| 106 | case ShaderType::TesselationControl: | ||
| 107 | return GL_TESS_CONTROL_PROGRAM_NV; | ||
| 108 | case ShaderType::TesselationEval: | ||
| 109 | return GL_TESS_EVALUATION_PROGRAM_NV; | ||
| 110 | case ShaderType::Geometry: | ||
| 111 | return GL_GEOMETRY_PROGRAM_NV; | ||
| 112 | case ShaderType::Fragment: | ||
| 113 | return GL_FRAGMENT_PROGRAM_NV; | ||
| 114 | case ShaderType::Compute: | ||
| 115 | return GL_COMPUTE_PROGRAM_NV; | ||
| 116 | } | ||
| 117 | return {}; | ||
| 118 | } | ||
| 119 | |||
| 100 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | 120 | std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { |
| 101 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); | 121 | return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); |
| 102 | } | 122 | } |
| @@ -120,18 +140,44 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | |||
| 120 | return registry; | 140 | return registry; |
| 121 | } | 141 | } |
| 122 | 142 | ||
| 123 | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, | 143 | ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier, |
| 124 | u64 unique_identifier, const ShaderIR& ir, | 144 | const ShaderIR& ir, const Registry& registry, |
| 125 | const Registry& registry, bool hint_retrievable = false) { | 145 | bool hint_retrievable = false) { |
| 126 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); | 146 | const std::string shader_id = MakeShaderID(unique_identifier, shader_type); |
| 127 | LOG_INFO(Render_OpenGL, "{}", shader_id); | 147 | LOG_INFO(Render_OpenGL, "{}", shader_id); |
| 128 | 148 | ||
| 129 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | 149 | auto program = std::make_shared<ProgramHandle>(); |
| 130 | OGLShader shader; | 150 | |
| 131 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | 151 | if (device.UseAssemblyShaders()) { |
| 152 | const std::string arb = | ||
| 153 | DecompileAssemblyShader(device, ir, registry, shader_type, shader_id); | ||
| 154 | |||
| 155 | GLuint& arb_prog = program->assembly_program.handle; | ||
| 156 | |||
| 157 | // Commented out functions signal OpenGL errors but are compatible with apitrace. | ||
| 158 | // Use them only to capture and replay on apitrace. | ||
| 159 | #if 0 | ||
| 160 | glGenProgramsNV(1, &arb_prog); | ||
| 161 | glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()), | ||
| 162 | reinterpret_cast<const GLubyte*>(arb.data())); | ||
| 163 | #else | ||
| 164 | glGenProgramsARB(1, &arb_prog); | ||
| 165 | glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB, | ||
| 166 | static_cast<GLsizei>(arb.size()), arb.data()); | ||
| 167 | #endif | ||
| 168 | const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV)); | ||
| 169 | if (err && *err) { | ||
| 170 | LOG_CRITICAL(Render_OpenGL, "{}", err); | ||
| 171 | LOG_INFO(Render_OpenGL, "\n{}", arb); | ||
| 172 | } | ||
| 173 | } else { | ||
| 174 | const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); | ||
| 175 | OGLShader shader; | ||
| 176 | shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | ||
| 177 | |||
| 178 | program->source_program.Create(true, hint_retrievable, shader.handle); | ||
| 179 | } | ||
| 132 | 180 | ||
| 133 | auto program = std::make_shared<OGLProgram>(); | ||
| 134 | program->Create(true, hint_retrievable, shader.handle); | ||
| 135 | return program; | 181 | return program; |
| 136 | } | 182 | } |
| 137 | 183 | ||
| @@ -151,22 +197,26 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 151 | 197 | ||
| 152 | } // Anonymous namespace | 198 | } // Anonymous namespace |
| 153 | 199 | ||
| 154 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 200 | Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, |
| 155 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 201 | ProgramSharedPtr program_) |
| 156 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) | 202 | : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { |
| 157 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, | 203 | handle = program->assembly_program.handle; |
| 158 | size_in_bytes{size_in_bytes}, program{std::move(program)} {} | 204 | if (handle == 0) { |
| 205 | handle = program->source_program.handle; | ||
| 206 | } | ||
| 207 | ASSERT(handle != 0); | ||
| 208 | } | ||
| 159 | 209 | ||
| 160 | CachedShader::~CachedShader() = default; | 210 | Shader::~Shader() = default; |
| 161 | 211 | ||
| 162 | GLuint CachedShader::GetHandle() const { | 212 | GLuint Shader::GetHandle() const { |
| 163 | DEBUG_ASSERT(registry->IsConsistent()); | 213 | DEBUG_ASSERT(registry->IsConsistent()); |
| 164 | return program->handle; | 214 | return handle; |
| 165 | } | 215 | } |
| 166 | 216 | ||
| 167 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 217 | std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, |
| 168 | Maxwell::ShaderProgram program_type, ProgramCode code, | 218 | Maxwell::ShaderProgram program_type, |
| 169 | ProgramCode code_b) { | 219 | ProgramCode code, ProgramCode code_b) { |
| 170 | const auto shader_type = GetShaderType(program_type); | 220 | const auto shader_type = GetShaderType(program_type); |
| 171 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 221 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 172 | 222 | ||
| @@ -191,11 +241,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 191 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 241 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 192 | params.disk_cache.SaveEntry(std::move(entry)); | 242 | params.disk_cache.SaveEntry(std::move(entry)); |
| 193 | 243 | ||
| 194 | return std::shared_ptr<CachedShader>(new CachedShader( | 244 | return std::unique_ptr<Shader>(new Shader( |
| 195 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); | 245 | std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); |
| 196 | } | 246 | } |
| 197 | 247 | ||
| 198 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 248 | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, |
| 249 | ProgramCode code) { | ||
| 199 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 250 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 200 | 251 | ||
| 201 | auto& engine = params.system.GPU().KeplerCompute(); | 252 | auto& engine = params.system.GPU().KeplerCompute(); |
| @@ -215,22 +266,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog | |||
| 215 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 266 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 216 | params.disk_cache.SaveEntry(std::move(entry)); | 267 | params.disk_cache.SaveEntry(std::move(entry)); |
| 217 | 268 | ||
| 218 | return std::shared_ptr<CachedShader>(new CachedShader( | 269 | return std::unique_ptr<Shader>(new Shader(std::move(registry), |
| 219 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); | 270 | MakeEntries(params.device, ir, ShaderType::Compute), |
| 271 | std::move(program))); | ||
| 220 | } | 272 | } |
| 221 | 273 | ||
| 222 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 274 | std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, |
| 223 | const PrecompiledShader& precompiled_shader, | 275 | const PrecompiledShader& precompiled_shader) { |
| 224 | std::size_t size_in_bytes) { | 276 | return std::unique_ptr<Shader>(new Shader( |
| 225 | return std::shared_ptr<CachedShader>( | 277 | precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); |
| 226 | new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, | ||
| 227 | precompiled_shader.entries, precompiled_shader.program)); | ||
| 228 | } | 278 | } |
| 229 | 279 | ||
| 230 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 280 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| 231 | Core::Frontend::EmuWindow& emu_window, const Device& device) | 281 | Core::Frontend::EmuWindow& emu_window, const Device& device) |
| 232 | : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, | 282 | : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, |
| 233 | disk_cache{system} {} | 283 | emu_window{emu_window}, device{device}, disk_cache{system} {} |
| 284 | |||
| 285 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | ||
| 234 | 286 | ||
| 235 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | 287 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, |
| 236 | const VideoCore::DiskResourceLoadCallback& callback) { | 288 | const VideoCore::DiskResourceLoadCallback& callback) { |
| @@ -239,7 +291,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 239 | return; | 291 | return; |
| 240 | } | 292 | } |
| 241 | 293 | ||
| 242 | const std::vector gl_cache = disk_cache.LoadPrecompiled(); | 294 | std::vector<ShaderDiskCachePrecompiled> gl_cache; |
| 295 | if (!device.UseAssemblyShaders()) { | ||
| 296 | // Only load precompiled cache when we are not using assembly shaders | ||
| 297 | gl_cache = disk_cache.LoadPrecompiled(); | ||
| 298 | } | ||
| 243 | const auto supported_formats = GetSupportedFormats(); | 299 | const auto supported_formats = GetSupportedFormats(); |
| 244 | 300 | ||
| 245 | // Track if precompiled cache was altered during loading to know if we have to | 301 | // Track if precompiled cache was altered during loading to know if we have to |
| @@ -278,7 +334,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 278 | auto registry = MakeRegistry(entry); | 334 | auto registry = MakeRegistry(entry); |
| 279 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); | 335 | const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); |
| 280 | 336 | ||
| 281 | std::shared_ptr<OGLProgram> program; | 337 | ProgramSharedPtr program; |
| 282 | if (precompiled_entry) { | 338 | if (precompiled_entry) { |
| 283 | // If the shader is precompiled, attempt to load it with | 339 | // If the shader is precompiled, attempt to load it with |
| 284 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); | 340 | program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); |
| @@ -294,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 294 | PrecompiledShader shader; | 350 | PrecompiledShader shader; |
| 295 | shader.program = std::move(program); | 351 | shader.program = std::move(program); |
| 296 | shader.registry = std::move(registry); | 352 | shader.registry = std::move(registry); |
| 297 | shader.entries = MakeEntries(ir); | 353 | shader.entries = MakeEntries(device, ir, entry.type); |
| 298 | 354 | ||
| 299 | std::scoped_lock lock{mutex}; | 355 | std::scoped_lock lock{mutex}; |
| 300 | if (callback) { | 356 | if (callback) { |
| @@ -332,6 +388,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 332 | return; | 388 | return; |
| 333 | } | 389 | } |
| 334 | 390 | ||
| 391 | if (device.UseAssemblyShaders()) { | ||
| 392 | // Don't store precompiled binaries for assembly shaders. | ||
| 393 | return; | ||
| 394 | } | ||
| 395 | |||
| 335 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | 396 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 336 | // before precompiling them | 397 | // before precompiling them |
| 337 | 398 | ||
| @@ -339,7 +400,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 339 | const u64 id = (*transferable)[i].unique_identifier; | 400 | const u64 id = (*transferable)[i].unique_identifier; |
| 340 | const auto it = find_precompiled(id); | 401 | const auto it = find_precompiled(id); |
| 341 | if (it == gl_cache.end()) { | 402 | if (it == gl_cache.end()) { |
| 342 | const GLuint program = runtime_cache.at(id).program->handle; | 403 | const GLuint program = runtime_cache.at(id).program->source_program.handle; |
| 343 | disk_cache.SavePrecompiled(id, program); | 404 | disk_cache.SavePrecompiled(id, program); |
| 344 | precompiled_cache_altered = true; | 405 | precompiled_cache_altered = true; |
| 345 | } | 406 | } |
| @@ -350,7 +411,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 350 | } | 411 | } |
| 351 | } | 412 | } |
| 352 | 413 | ||
| 353 | std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( | 414 | ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( |
| 354 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | 415 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 355 | const std::unordered_set<GLenum>& supported_formats) { | 416 | const std::unordered_set<GLenum>& supported_formats) { |
| 356 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { | 417 | if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { |
| @@ -358,15 +419,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 358 | return {}; | 419 | return {}; |
| 359 | } | 420 | } |
| 360 | 421 | ||
| 361 | auto program = std::make_shared<OGLProgram>(); | 422 | auto program = std::make_shared<ProgramHandle>(); |
| 362 | program->handle = glCreateProgram(); | 423 | GLuint& handle = program->source_program.handle; |
| 363 | glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); | 424 | handle = glCreateProgram(); |
| 364 | glProgramBinary(program->handle, precompiled_entry.binary_format, | 425 | glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE); |
| 365 | precompiled_entry.binary.data(), | 426 | glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(), |
| 366 | static_cast<GLsizei>(precompiled_entry.binary.size())); | 427 | static_cast<GLsizei>(precompiled_entry.binary.size())); |
| 367 | 428 | ||
| 368 | GLint link_status; | 429 | GLint link_status; |
| 369 | glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); | 430 | glGetProgramiv(handle, GL_LINK_STATUS, &link_status); |
| 370 | if (link_status == GL_FALSE) { | 431 | if (link_status == GL_FALSE) { |
| 371 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); | 432 | LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); |
| 372 | return {}; | 433 | return {}; |
| @@ -375,7 +436,7 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 375 | return program; | 436 | return program; |
| 376 | } | 437 | } |
| 377 | 438 | ||
| 378 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 439 | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 379 | if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { | 440 | if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { |
| 380 | return last_shaders[static_cast<std::size_t>(program)]; | 441 | return last_shaders[static_cast<std::size_t>(program)]; |
| 381 | } | 442 | } |
| @@ -385,8 +446,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 385 | 446 | ||
| 386 | // Look up shader in the cache based on address | 447 | // Look up shader in the cache based on address |
| 387 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | 448 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |
| 388 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; | 449 | if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { |
| 389 | if (shader) { | ||
| 390 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 450 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 391 | } | 451 | } |
| 392 | 452 | ||
| @@ -407,30 +467,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 407 | const ShaderParameters params{system, disk_cache, device, | 467 | const ShaderParameters params{system, disk_cache, device, |
| 408 | *cpu_addr, host_ptr, unique_identifier}; | 468 | *cpu_addr, host_ptr, unique_identifier}; |
| 409 | 469 | ||
| 470 | std::unique_ptr<Shader> shader; | ||
| 410 | const auto found = runtime_cache.find(unique_identifier); | 471 | const auto found = runtime_cache.find(unique_identifier); |
| 411 | if (found == runtime_cache.end()) { | 472 | if (found == runtime_cache.end()) { |
| 412 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), | 473 | shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); |
| 413 | std::move(code_b)); | ||
| 414 | } else { | 474 | } else { |
| 415 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 475 | shader = Shader::CreateFromCache(params, found->second); |
| 416 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 417 | } | 476 | } |
| 418 | 477 | ||
| 478 | Shader* const result = shader.get(); | ||
| 419 | if (cpu_addr) { | 479 | if (cpu_addr) { |
| 420 | Register(shader); | 480 | Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64)); |
| 421 | } else { | 481 | } else { |
| 422 | null_shader = shader; | 482 | null_shader = std::move(shader); |
| 423 | } | 483 | } |
| 424 | 484 | ||
| 425 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 485 | return last_shaders[static_cast<std::size_t>(program)] = result; |
| 426 | } | 486 | } |
| 427 | 487 | ||
| 428 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | 488 | Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { |
| 429 | auto& memory_manager{system.GPU().MemoryManager()}; | 489 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 430 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; | 490 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; |
| 431 | 491 | ||
| 432 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; | 492 | if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { |
| 433 | if (kernel) { | ||
| 434 | return kernel; | 493 | return kernel; |
| 435 | } | 494 | } |
| 436 | 495 | ||
| @@ -442,20 +501,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 442 | const ShaderParameters params{system, disk_cache, device, | 501 | const ShaderParameters params{system, disk_cache, device, |
| 443 | *cpu_addr, host_ptr, unique_identifier}; | 502 | *cpu_addr, host_ptr, unique_identifier}; |
| 444 | 503 | ||
| 504 | std::unique_ptr<Shader> kernel; | ||
| 445 | const auto found = runtime_cache.find(unique_identifier); | 505 | const auto found = runtime_cache.find(unique_identifier); |
| 446 | if (found == runtime_cache.end()) { | 506 | if (found == runtime_cache.end()) { |
| 447 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 507 | kernel = Shader::CreateKernelFromMemory(params, std::move(code)); |
| 448 | } else { | 508 | } else { |
| 449 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 509 | kernel = Shader::CreateFromCache(params, found->second); |
| 450 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 451 | } | 510 | } |
| 452 | 511 | ||
| 512 | Shader* const result = kernel.get(); | ||
| 453 | if (cpu_addr) { | 513 | if (cpu_addr) { |
| 454 | Register(kernel); | 514 | Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64)); |
| 455 | } else { | 515 | } else { |
| 456 | null_kernel = kernel; | 516 | null_kernel = std::move(kernel); |
| 457 | } | 517 | } |
| 458 | return kernel; | 518 | return result; |
| 459 | } | 519 | } |
| 460 | 520 | ||
| 461 | } // namespace OpenGL | 521 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 91690b470..6848f1388 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -18,12 +18,12 @@ | |||
| 18 | 18 | ||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "video_core/engines/shader_type.h" | 20 | #include "video_core/engines/shader_type.h" |
| 21 | #include "video_core/rasterizer_cache.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 23 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 25 | #include "video_core/shader/registry.h" | 24 | #include "video_core/shader/registry.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 25 | #include "video_core/shader/shader_ir.h" |
| 26 | #include "video_core/shader_cache.h" | ||
| 27 | 27 | ||
| 28 | namespace Core { | 28 | namespace Core { |
| 29 | class System; | 29 | class System; |
| @@ -35,16 +35,20 @@ class EmuWindow; | |||
| 35 | 35 | ||
| 36 | namespace OpenGL { | 36 | namespace OpenGL { |
| 37 | 37 | ||
| 38 | class CachedShader; | ||
| 39 | class Device; | 38 | class Device; |
| 40 | class RasterizerOpenGL; | 39 | class RasterizerOpenGL; |
| 41 | struct UnspecializedShader; | 40 | struct UnspecializedShader; |
| 42 | 41 | ||
| 43 | using Shader = std::shared_ptr<CachedShader>; | ||
| 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 45 | 43 | ||
| 44 | struct ProgramHandle { | ||
| 45 | OGLProgram source_program; | ||
| 46 | OGLAssemblyProgram assembly_program; | ||
| 47 | }; | ||
| 48 | using ProgramSharedPtr = std::shared_ptr<ProgramHandle>; | ||
| 49 | |||
| 46 | struct PrecompiledShader { | 50 | struct PrecompiledShader { |
| 47 | std::shared_ptr<OGLProgram> program; | 51 | ProgramSharedPtr program; |
| 48 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 52 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 49 | ShaderEntries entries; | 53 | ShaderEntries entries; |
| 50 | }; | 54 | }; |
| @@ -58,64 +62,56 @@ struct ShaderParameters { | |||
| 58 | u64 unique_identifier; | 62 | u64 unique_identifier; |
| 59 | }; | 63 | }; |
| 60 | 64 | ||
| 61 | class CachedShader final : public RasterizerCacheObject { | 65 | class Shader final { |
| 62 | public: | 66 | public: |
| 63 | ~CachedShader(); | 67 | ~Shader(); |
| 64 | 68 | ||
| 65 | /// Gets the GL program handle for the shader | 69 | /// Gets the GL program handle for the shader |
| 66 | GLuint GetHandle() const; | 70 | GLuint GetHandle() const; |
| 67 | 71 | ||
| 68 | /// Returns the size in bytes of the shader | ||
| 69 | std::size_t GetSizeInBytes() const override { | ||
| 70 | return size_in_bytes; | ||
| 71 | } | ||
| 72 | |||
| 73 | /// Gets the shader entries for the shader | 72 | /// Gets the shader entries for the shader |
| 74 | const ShaderEntries& GetEntries() const { | 73 | const ShaderEntries& GetEntries() const { |
| 75 | return entries; | 74 | return entries; |
| 76 | } | 75 | } |
| 77 | 76 | ||
| 78 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 77 | static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, |
| 79 | Maxwell::ShaderProgram program_type, | 78 | Maxwell::ShaderProgram program_type, |
| 80 | ProgramCode program_code, ProgramCode program_code_b); | 79 | ProgramCode program_code, |
| 81 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | 80 | ProgramCode program_code_b); |
| 81 | static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, | ||
| 82 | ProgramCode code); | ||
| 82 | 83 | ||
| 83 | static Shader CreateFromCache(const ShaderParameters& params, | 84 | static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, |
| 84 | const PrecompiledShader& precompiled_shader, | 85 | const PrecompiledShader& precompiled_shader); |
| 85 | std::size_t size_in_bytes); | ||
| 86 | 86 | ||
| 87 | private: | 87 | private: |
| 88 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 88 | explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, |
| 89 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 89 | ProgramSharedPtr program); |
| 90 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | ||
| 91 | 90 | ||
| 92 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 91 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 93 | ShaderEntries entries; | 92 | ShaderEntries entries; |
| 94 | std::size_t size_in_bytes = 0; | 93 | ProgramSharedPtr program; |
| 95 | std::shared_ptr<OGLProgram> program; | 94 | GLuint handle = 0; |
| 96 | }; | 95 | }; |
| 97 | 96 | ||
| 98 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 97 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { |
| 99 | public: | 98 | public: |
| 100 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 99 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| 101 | Core::Frontend::EmuWindow& emu_window, const Device& device); | 100 | Core::Frontend::EmuWindow& emu_window, const Device& device); |
| 101 | ~ShaderCacheOpenGL() override; | ||
| 102 | 102 | ||
| 103 | /// Loads disk cache for the current game | 103 | /// Loads disk cache for the current game |
| 104 | void LoadDiskCache(const std::atomic_bool& stop_loading, | 104 | void LoadDiskCache(const std::atomic_bool& stop_loading, |
| 105 | const VideoCore::DiskResourceLoadCallback& callback); | 105 | const VideoCore::DiskResourceLoadCallback& callback); |
| 106 | 106 | ||
| 107 | /// Gets the current specified shader stage program | 107 | /// Gets the current specified shader stage program |
| 108 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 108 | Shader* GetStageProgram(Maxwell::ShaderProgram program); |
| 109 | 109 | ||
| 110 | /// Gets a compute kernel in the passed address | 110 | /// Gets a compute kernel in the passed address |
| 111 | Shader GetComputeKernel(GPUVAddr code_addr); | 111 | Shader* GetComputeKernel(GPUVAddr code_addr); |
| 112 | |||
| 113 | protected: | ||
| 114 | // We do not have to flush this cache as things in it are never modified by us. | ||
| 115 | void FlushObjectInner(const Shader& object) override {} | ||
| 116 | 112 | ||
| 117 | private: | 113 | private: |
| 118 | std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( | 114 | ProgramSharedPtr GeneratePrecompiledProgram( |
| 119 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, | 115 | const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, |
| 120 | const std::unordered_set<GLenum>& supported_formats); | 116 | const std::unordered_set<GLenum>& supported_formats); |
| 121 | 117 | ||
| @@ -125,10 +121,10 @@ private: | |||
| 125 | ShaderDiskCacheOpenGL disk_cache; | 121 | ShaderDiskCacheOpenGL disk_cache; |
| 126 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | 122 | std::unordered_map<u64, PrecompiledShader> runtime_cache; |
| 127 | 123 | ||
| 128 | Shader null_shader{}; | 124 | std::unique_ptr<Shader> null_shader; |
| 129 | Shader null_kernel{}; | 125 | std::unique_ptr<Shader> null_kernel; |
| 130 | 126 | ||
| 131 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 127 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |
| 132 | }; | 128 | }; |
| 133 | 129 | ||
| 134 | } // namespace OpenGL | 130 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 960ebf1a1..d6e30b321 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -61,8 +61,8 @@ struct TextureDerivates {}; | |||
| 61 | using TextureArgument = std::pair<Type, Node>; | 61 | using TextureArgument = std::pair<Type, Node>; |
| 62 | using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; | 62 | using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; |
| 63 | 63 | ||
| 64 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); |
| 65 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); | 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); |
| 66 | 66 | ||
| 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt |
| 68 | #define ftou floatBitsToUint | 68 | #define ftou floatBitsToUint |
| @@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 402 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 402 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 403 | } | 403 | } |
| 404 | 404 | ||
| 405 | bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||
| 406 | const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); | ||
| 407 | // We waste one UBO for emulation | ||
| 408 | const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; | ||
| 409 | return num_ubos > num_available_ubos; | ||
| 410 | } | ||
| 411 | |||
| 405 | struct GenericVaryingDescription { | 412 | struct GenericVaryingDescription { |
| 406 | std::string name; | 413 | std::string name; |
| 407 | u8 first_element = 0; | 414 | u8 first_element = 0; |
| @@ -412,8 +419,9 @@ class GLSLDecompiler final { | |||
| 412 | public: | 419 | public: |
| 413 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | 420 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 414 | ShaderType stage, std::string_view identifier, std::string_view suffix) | 421 | ShaderType stage, std::string_view identifier, std::string_view suffix) |
| 415 | : device{device}, ir{ir}, registry{registry}, stage{stage}, | 422 | : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier}, |
| 416 | identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { | 423 | suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{ |
| 424 | UseUnifiedUniforms(device, ir, stage)} { | ||
| 417 | if (stage != ShaderType::Compute) { | 425 | if (stage != ShaderType::Compute) { |
| 418 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | 426 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); |
| 419 | } | 427 | } |
| @@ -618,7 +626,9 @@ private: | |||
| 618 | break; | 626 | break; |
| 619 | } | 627 | } |
| 620 | } | 628 | } |
| 621 | if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { | 629 | |
| 630 | if (stage != ShaderType::Geometry && | ||
| 631 | (stage != ShaderType::Vertex || device.HasVertexViewportLayer())) { | ||
| 622 | if (ir.UsesLayer()) { | 632 | if (ir.UsesLayer()) { |
| 623 | code.AddLine("int gl_Layer;"); | 633 | code.AddLine("int gl_Layer;"); |
| 624 | } | 634 | } |
| @@ -647,6 +657,16 @@ private: | |||
| 647 | --code.scope; | 657 | --code.scope; |
| 648 | code.AddLine("}};"); | 658 | code.AddLine("}};"); |
| 649 | code.AddNewLine(); | 659 | code.AddNewLine(); |
| 660 | |||
| 661 | if (stage == ShaderType::Geometry) { | ||
| 662 | if (ir.UsesLayer()) { | ||
| 663 | code.AddLine("out int gl_Layer;"); | ||
| 664 | } | ||
| 665 | if (ir.UsesViewportIndex()) { | ||
| 666 | code.AddLine("out int gl_ViewportIndex;"); | ||
| 667 | } | ||
| 668 | } | ||
| 669 | code.AddNewLine(); | ||
| 650 | } | 670 | } |
| 651 | 671 | ||
| 652 | void DeclareRegisters() { | 672 | void DeclareRegisters() { |
| @@ -834,12 +854,24 @@ private: | |||
| 834 | } | 854 | } |
| 835 | 855 | ||
| 836 | void DeclareConstantBuffers() { | 856 | void DeclareConstantBuffers() { |
| 857 | if (use_unified_uniforms) { | ||
| 858 | const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + | ||
| 859 | static_cast<u32>(ir.GetGlobalMemory().size()); | ||
| 860 | code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", | ||
| 861 | binding); | ||
| 862 | code.AddLine(" uint cbufs[];"); | ||
| 863 | code.AddLine("}};"); | ||
| 864 | code.AddNewLine(); | ||
| 865 | return; | ||
| 866 | } | ||
| 867 | |||
| 837 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; | 868 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; |
| 838 | for (const auto& buffers : ir.GetConstantBuffers()) { | 869 | for (const auto [index, info] : ir.GetConstantBuffers()) { |
| 839 | const auto index = buffers.first; | 870 | const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4; |
| 871 | const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; | ||
| 840 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, | 872 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, |
| 841 | GetConstBufferBlock(index)); | 873 | GetConstBufferBlock(index)); |
| 842 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); | 874 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); |
| 843 | code.AddLine("}};"); | 875 | code.AddLine("}};"); |
| 844 | code.AddNewLine(); | 876 | code.AddNewLine(); |
| 845 | } | 877 | } |
| @@ -1038,42 +1070,51 @@ private: | |||
| 1038 | 1070 | ||
| 1039 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | 1071 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { |
| 1040 | const Node offset = cbuf->GetOffset(); | 1072 | const Node offset = cbuf->GetOffset(); |
| 1073 | const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; | ||
| 1074 | |||
| 1041 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | 1075 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |
| 1042 | // Direct access | 1076 | // Direct access |
| 1043 | const u32 offset_imm = immediate->GetValue(); | 1077 | const u32 offset_imm = immediate->GetValue(); |
| 1044 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | 1078 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); |
| 1045 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | 1079 | if (use_unified_uniforms) { |
| 1046 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | 1080 | return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), |
| 1047 | Type::Uint}; | 1081 | Type::Uint}; |
| 1082 | } else { | ||
| 1083 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1084 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||
| 1085 | Type::Uint}; | ||
| 1086 | } | ||
| 1048 | } | 1087 | } |
| 1049 | 1088 | ||
| 1050 | if (std::holds_alternative<OperationNode>(*offset)) { | 1089 | // Indirect access |
| 1051 | // Indirect access | 1090 | if (use_unified_uniforms) { |
| 1052 | const std::string final_offset = code.GenerateTemporary(); | 1091 | return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, |
| 1053 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | 1092 | Visit(offset).AsUint()), |
| 1093 | Type::Uint}; | ||
| 1094 | } | ||
| 1054 | 1095 | ||
| 1055 | if (!device.HasComponentIndexingBug()) { | 1096 | const std::string final_offset = code.GenerateTemporary(); |
| 1056 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), | 1097 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); |
| 1057 | final_offset, final_offset), | ||
| 1058 | Type::Uint}; | ||
| 1059 | } | ||
| 1060 | 1098 | ||
| 1061 | // AMD's proprietary GLSL compiler emits ill code for variable component access. | 1099 | if (!device.HasComponentIndexingBug()) { |
| 1062 | // To bypass this driver bug generate 4 ifs, one per each component. | 1100 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), |
| 1063 | const std::string pack = code.GenerateTemporary(); | 1101 | final_offset, final_offset), |
| 1064 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | 1102 | Type::Uint}; |
| 1065 | final_offset); | ||
| 1066 | |||
| 1067 | const std::string result = code.GenerateTemporary(); | ||
| 1068 | code.AddLine("uint {};", result); | ||
| 1069 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 1070 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, | ||
| 1071 | pack, GetSwizzle(swizzle)); | ||
| 1072 | } | ||
| 1073 | return {result, Type::Uint}; | ||
| 1074 | } | 1103 | } |
| 1075 | 1104 | ||
| 1076 | UNREACHABLE_MSG("Unmanaged offset node type"); | 1105 | // AMD's proprietary GLSL compiler emits ill code for variable component access. |
| 1106 | // To bypass this driver bug generate 4 ifs, one per each component. | ||
| 1107 | const std::string pack = code.GenerateTemporary(); | ||
| 1108 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | ||
| 1109 | final_offset); | ||
| 1110 | |||
| 1111 | const std::string result = code.GenerateTemporary(); | ||
| 1112 | code.AddLine("uint {};", result); | ||
| 1113 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 1114 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, | ||
| 1115 | GetSwizzle(swizzle)); | ||
| 1116 | } | ||
| 1117 | return {result, Type::Uint}; | ||
| 1077 | } | 1118 | } |
| 1078 | 1119 | ||
| 1079 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | 1120 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { |
| @@ -1538,7 +1579,9 @@ private: | |||
| 1538 | Expression target; | 1579 | Expression target; |
| 1539 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1580 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 1540 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1581 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 1541 | // Writing to Register::ZeroIndex is a no op | 1582 | // Writing to Register::ZeroIndex is a no op but we still have to visit the source |
| 1583 | // as it might have side effects. | ||
| 1584 | code.AddLine("{};", Visit(src).GetCode()); | ||
| 1542 | return {}; | 1585 | return {}; |
| 1543 | } | 1586 | } |
| 1544 | target = {GetRegister(gpr->GetIndex()), Type::Float}; | 1587 | target = {GetRegister(gpr->GetIndex()), Type::Float}; |
| @@ -2309,6 +2352,18 @@ private: | |||
| 2309 | return {"gl_SubGroupInvocationARB", Type::Uint}; | 2352 | return {"gl_SubGroupInvocationARB", Type::Uint}; |
| 2310 | } | 2353 | } |
| 2311 | 2354 | ||
| 2355 | template <const std::string_view& comparison> | ||
| 2356 | Expression ThreadMask(Operation) { | ||
| 2357 | if (device.HasWarpIntrinsics()) { | ||
| 2358 | return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; | ||
| 2359 | } | ||
| 2360 | if (device.HasShaderBallot()) { | ||
| 2361 | return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; | ||
| 2362 | } | ||
| 2363 | LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); | ||
| 2364 | return {"0U", Type::Uint}; | ||
| 2365 | } | ||
| 2366 | |||
| 2312 | Expression ShuffleIndexed(Operation operation) { | 2367 | Expression ShuffleIndexed(Operation operation) { |
| 2313 | std::string value = VisitOperand(operation, 0).AsFloat(); | 2368 | std::string value = VisitOperand(operation, 0).AsFloat(); |
| 2314 | 2369 | ||
| @@ -2321,7 +2376,21 @@ private: | |||
| 2321 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; | 2376 | return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; |
| 2322 | } | 2377 | } |
| 2323 | 2378 | ||
| 2324 | Expression MemoryBarrierGL(Operation) { | 2379 | Expression Barrier(Operation) { |
| 2380 | if (!ir.IsDecompiled()) { | ||
| 2381 | LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled"); | ||
| 2382 | return {}; | ||
| 2383 | } | ||
| 2384 | code.AddLine("barrier();"); | ||
| 2385 | return {}; | ||
| 2386 | } | ||
| 2387 | |||
| 2388 | Expression MemoryBarrierGroup(Operation) { | ||
| 2389 | code.AddLine("groupMemoryBarrier();"); | ||
| 2390 | return {}; | ||
| 2391 | } | ||
| 2392 | |||
| 2393 | Expression MemoryBarrierGlobal(Operation) { | ||
| 2325 | code.AddLine("memoryBarrier();"); | 2394 | code.AddLine("memoryBarrier();"); |
| 2326 | return {}; | 2395 | return {}; |
| 2327 | } | 2396 | } |
| @@ -2337,6 +2406,12 @@ private: | |||
| 2337 | static constexpr std::string_view NotEqual = "!="; | 2406 | static constexpr std::string_view NotEqual = "!="; |
| 2338 | static constexpr std::string_view GreaterEqual = ">="; | 2407 | static constexpr std::string_view GreaterEqual = ">="; |
| 2339 | 2408 | ||
| 2409 | static constexpr std::string_view Eq = "Eq"; | ||
| 2410 | static constexpr std::string_view Ge = "Ge"; | ||
| 2411 | static constexpr std::string_view Gt = "Gt"; | ||
| 2412 | static constexpr std::string_view Le = "Le"; | ||
| 2413 | static constexpr std::string_view Lt = "Lt"; | ||
| 2414 | |||
| 2340 | static constexpr std::string_view Add = "Add"; | 2415 | static constexpr std::string_view Add = "Add"; |
| 2341 | static constexpr std::string_view Min = "Min"; | 2416 | static constexpr std::string_view Min = "Min"; |
| 2342 | static constexpr std::string_view Max = "Max"; | 2417 | static constexpr std::string_view Max = "Max"; |
| @@ -2554,9 +2629,16 @@ private: | |||
| 2554 | &GLSLDecompiler::VoteEqual, | 2629 | &GLSLDecompiler::VoteEqual, |
| 2555 | 2630 | ||
| 2556 | &GLSLDecompiler::ThreadId, | 2631 | &GLSLDecompiler::ThreadId, |
| 2632 | &GLSLDecompiler::ThreadMask<Func::Eq>, | ||
| 2633 | &GLSLDecompiler::ThreadMask<Func::Ge>, | ||
| 2634 | &GLSLDecompiler::ThreadMask<Func::Gt>, | ||
| 2635 | &GLSLDecompiler::ThreadMask<Func::Le>, | ||
| 2636 | &GLSLDecompiler::ThreadMask<Func::Lt>, | ||
| 2557 | &GLSLDecompiler::ShuffleIndexed, | 2637 | &GLSLDecompiler::ShuffleIndexed, |
| 2558 | 2638 | ||
| 2559 | &GLSLDecompiler::MemoryBarrierGL, | 2639 | &GLSLDecompiler::Barrier, |
| 2640 | &GLSLDecompiler::MemoryBarrierGroup, | ||
| 2641 | &GLSLDecompiler::MemoryBarrierGlobal, | ||
| 2560 | }; | 2642 | }; |
| 2561 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2643 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2562 | 2644 | ||
| @@ -2669,6 +2751,7 @@ private: | |||
| 2669 | const std::string_view identifier; | 2751 | const std::string_view identifier; |
| 2670 | const std::string_view suffix; | 2752 | const std::string_view suffix; |
| 2671 | const Header header; | 2753 | const Header header; |
| 2754 | const bool use_unified_uniforms; | ||
| 2672 | std::unordered_map<u8, VaryingTFB> transform_feedback; | 2755 | std::unordered_map<u8, VaryingTFB> transform_feedback; |
| 2673 | 2756 | ||
| 2674 | ShaderWriter code; | 2757 | ShaderWriter code; |
| @@ -2864,7 +2947,7 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2864 | 2947 | ||
| 2865 | } // Anonymous namespace | 2948 | } // Anonymous namespace |
| 2866 | 2949 | ||
| 2867 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { | 2950 | ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { |
| 2868 | ShaderEntries entries; | 2951 | ShaderEntries entries; |
| 2869 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 2952 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 2870 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | 2953 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), |
| @@ -2885,6 +2968,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2885 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | 2968 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; |
| 2886 | } | 2969 | } |
| 2887 | entries.shader_length = ir.GetLength(); | 2970 | entries.shader_length = ir.GetLength(); |
| 2971 | entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); | ||
| 2888 | return entries; | 2972 | return entries; |
| 2889 | } | 2973 | } |
| 2890 | 2974 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e8a178764..451c9689a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -53,11 +53,13 @@ struct ShaderEntries { | |||
| 53 | std::vector<GlobalMemoryEntry> global_memory_entries; | 53 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 54 | std::vector<SamplerEntry> samplers; | 54 | std::vector<SamplerEntry> samplers; |
| 55 | std::vector<ImageEntry> images; | 55 | std::vector<ImageEntry> images; |
| 56 | u32 clip_distances{}; | ||
| 57 | std::size_t shader_length{}; | 56 | std::size_t shader_length{}; |
| 57 | u32 clip_distances{}; | ||
| 58 | bool use_unified_uniforms{}; | ||
| 58 | }; | 59 | }; |
| 59 | 60 | ||
| 60 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); | 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 62 | Tegra::Engines::ShaderType stage); | ||
| 61 | 63 | ||
| 62 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 64 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 63 | const VideoCommon::Shader::Registry& registry, | 65 | const VideoCommon::Shader::Registry& registry, |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9e95a122b..653c3f2f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap; | |||
| 29 | 29 | ||
| 30 | namespace { | 30 | namespace { |
| 31 | 31 | ||
| 32 | using VideoCommon::Shader::SeparateSamplerKey; | ||
| 33 | |||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | 34 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 33 | 35 | ||
| 34 | struct ConstBufferKey { | 36 | struct ConstBufferKey { |
| @@ -37,18 +39,26 @@ struct ConstBufferKey { | |||
| 37 | u32 value = 0; | 39 | u32 value = 0; |
| 38 | }; | 40 | }; |
| 39 | 41 | ||
| 40 | struct BoundSamplerKey { | 42 | struct BoundSamplerEntry { |
| 41 | u32 offset = 0; | 43 | u32 offset = 0; |
| 42 | Tegra::Engines::SamplerDescriptor sampler; | 44 | Tegra::Engines::SamplerDescriptor sampler; |
| 43 | }; | 45 | }; |
| 44 | 46 | ||
| 45 | struct BindlessSamplerKey { | 47 | struct SeparateSamplerEntry { |
| 48 | u32 cbuf1 = 0; | ||
| 49 | u32 cbuf2 = 0; | ||
| 50 | u32 offset1 = 0; | ||
| 51 | u32 offset2 = 0; | ||
| 52 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct BindlessSamplerEntry { | ||
| 46 | u32 cbuf = 0; | 56 | u32 cbuf = 0; |
| 47 | u32 offset = 0; | 57 | u32 offset = 0; |
| 48 | Tegra::Engines::SamplerDescriptor sampler; | 58 | Tegra::Engines::SamplerDescriptor sampler; |
| 49 | }; | 59 | }; |
| 50 | 60 | ||
| 51 | constexpr u32 NativeVersion = 20; | 61 | constexpr u32 NativeVersion = 21; |
| 52 | 62 | ||
| 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 63 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 54 | ShaderCacheVersionHash hash{}; | 64 | ShaderCacheVersionHash hash{}; |
| @@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | |||
| 87 | u32 texture_handler_size_value; | 97 | u32 texture_handler_size_value; |
| 88 | u32 num_keys; | 98 | u32 num_keys; |
| 89 | u32 num_bound_samplers; | 99 | u32 num_bound_samplers; |
| 100 | u32 num_separate_samplers; | ||
| 90 | u32 num_bindless_samplers; | 101 | u32 num_bindless_samplers; |
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | 102 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || |
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | 103 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || |
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || | 104 | file.ReadArray(&texture_handler_size_value, 1) != 1 || |
| 94 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || | 105 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || |
| 95 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | 106 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || |
| 107 | file.ReadArray(&num_separate_samplers, 1) != 1 || | ||
| 96 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | 108 | file.ReadArray(&num_bindless_samplers, 1) != 1) { |
| 97 | return false; | 109 | return false; |
| 98 | } | 110 | } |
| @@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | |||
| 101 | } | 113 | } |
| 102 | 114 | ||
| 103 | std::vector<ConstBufferKey> flat_keys(num_keys); | 115 | std::vector<ConstBufferKey> flat_keys(num_keys); |
| 104 | std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); | 116 | std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); |
| 105 | std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); | 117 | std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); |
| 118 | std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); | ||
| 106 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || | 119 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || |
| 107 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != | 120 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != |
| 108 | flat_bound_samplers.size() || | 121 | flat_bound_samplers.size() || |
| 122 | file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) != | ||
| 123 | flat_separate_samplers.size() || | ||
| 109 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != | 124 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != |
| 110 | flat_bindless_samplers.size()) { | 125 | flat_bindless_samplers.size()) { |
| 111 | return false; | 126 | return false; |
| 112 | } | 127 | } |
| 113 | for (const auto& key : flat_keys) { | 128 | for (const auto& entry : flat_keys) { |
| 114 | keys.insert({{key.cbuf, key.offset}, key.value}); | 129 | keys.insert({{entry.cbuf, entry.offset}, entry.value}); |
| 115 | } | 130 | } |
| 116 | for (const auto& key : flat_bound_samplers) { | 131 | for (const auto& entry : flat_bound_samplers) { |
| 117 | bound_samplers.emplace(key.offset, key.sampler); | 132 | bound_samplers.emplace(entry.offset, entry.sampler); |
| 118 | } | 133 | } |
| 119 | for (const auto& key : flat_bindless_samplers) { | 134 | for (const auto& entry : flat_separate_samplers) { |
| 120 | bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | 135 | SeparateSamplerKey key; |
| 136 | key.buffers = {entry.cbuf1, entry.cbuf2}; | ||
| 137 | key.offsets = {entry.offset1, entry.offset2}; | ||
| 138 | separate_samplers.emplace(key, entry.sampler); | ||
| 139 | } | ||
| 140 | for (const auto& entry : flat_bindless_samplers) { | ||
| 141 | bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); | ||
| 121 | } | 142 | } |
| 122 | 143 | ||
| 123 | return true; | 144 | return true; |
| @@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | |||
| 142 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | 163 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || |
| 143 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | 164 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || |
| 144 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | 165 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || |
| 166 | file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 || | ||
| 145 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | 167 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { |
| 146 | return false; | 168 | return false; |
| 147 | } | 169 | } |
| @@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | |||
| 152 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | 174 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); |
| 153 | } | 175 | } |
| 154 | 176 | ||
| 155 | std::vector<BoundSamplerKey> flat_bound_samplers; | 177 | std::vector<BoundSamplerEntry> flat_bound_samplers; |
| 156 | flat_bound_samplers.reserve(bound_samplers.size()); | 178 | flat_bound_samplers.reserve(bound_samplers.size()); |
| 157 | for (const auto& [address, sampler] : bound_samplers) { | 179 | for (const auto& [address, sampler] : bound_samplers) { |
| 158 | flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); | 180 | flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); |
| 181 | } | ||
| 182 | |||
| 183 | std::vector<SeparateSamplerEntry> flat_separate_samplers; | ||
| 184 | flat_separate_samplers.reserve(separate_samplers.size()); | ||
| 185 | for (const auto& [key, sampler] : separate_samplers) { | ||
| 186 | SeparateSamplerEntry entry; | ||
| 187 | std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; | ||
| 188 | std::tie(entry.offset1, entry.offset2) = key.offsets; | ||
| 189 | entry.sampler = sampler; | ||
| 190 | flat_separate_samplers.push_back(entry); | ||
| 159 | } | 191 | } |
| 160 | 192 | ||
| 161 | std::vector<BindlessSamplerKey> flat_bindless_samplers; | 193 | std::vector<BindlessSamplerEntry> flat_bindless_samplers; |
| 162 | flat_bindless_samplers.reserve(bindless_samplers.size()); | 194 | flat_bindless_samplers.reserve(bindless_samplers.size()); |
| 163 | for (const auto& [address, sampler] : bindless_samplers) { | 195 | for (const auto& [address, sampler] : bindless_samplers) { |
| 164 | flat_bindless_samplers.push_back( | 196 | flat_bindless_samplers.push_back( |
| 165 | BindlessSamplerKey{address.first, address.second, sampler}); | 197 | BindlessSamplerEntry{address.first, address.second, sampler}); |
| 166 | } | 198 | } |
| 167 | 199 | ||
| 168 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && | 200 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && |
| 169 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == | 201 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == |
| 170 | flat_bound_samplers.size() && | 202 | flat_bound_samplers.size() && |
| 203 | file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) == | ||
| 204 | flat_separate_samplers.size() && | ||
| 171 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == | 205 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == |
| 172 | flat_bindless_samplers.size(); | 206 | flat_bindless_samplers.size(); |
| 173 | } | 207 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index d5be52e40..a79cef0e9 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry { | |||
| 57 | VideoCommon::Shader::ComputeInfo compute_info; | 57 | VideoCommon::Shader::ComputeInfo compute_info; |
| 58 | VideoCommon::Shader::KeyMap keys; | 58 | VideoCommon::Shader::KeyMap keys; |
| 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; |
| 60 | VideoCommon::Shader::SeparateSamplerMap separate_samplers; | ||
| 60 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | 61 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |
| 61 | }; | 62 | }; |
| 62 | 63 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 9c7b0adbd..8e754fa90 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -6,45 +6,109 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 9 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 9 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 10 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 10 | 11 | ||
| 11 | namespace OpenGL::GLShader { | 12 | namespace OpenGL { |
| 12 | 13 | ||
| 13 | ProgramManager::ProgramManager() = default; | 14 | ProgramManager::ProgramManager(const Device& device) { |
| 15 | use_assembly_programs = device.UseAssemblyShaders(); | ||
| 16 | if (use_assembly_programs) { | ||
| 17 | glEnable(GL_COMPUTE_PROGRAM_NV); | ||
| 18 | } else { | ||
| 19 | graphics_pipeline.Create(); | ||
| 20 | glBindProgramPipeline(graphics_pipeline.handle); | ||
| 21 | } | ||
| 22 | } | ||
| 14 | 23 | ||
| 15 | ProgramManager::~ProgramManager() = default; | 24 | ProgramManager::~ProgramManager() = default; |
| 16 | 25 | ||
| 17 | void ProgramManager::Create() { | 26 | void ProgramManager::BindCompute(GLuint program) { |
| 18 | graphics_pipeline.Create(); | 27 | if (use_assembly_programs) { |
| 19 | glBindProgramPipeline(graphics_pipeline.handle); | 28 | glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); |
| 29 | } else { | ||
| 30 | is_graphics_bound = false; | ||
| 31 | glUseProgram(program); | ||
| 32 | } | ||
| 20 | } | 33 | } |
| 21 | 34 | ||
| 22 | void ProgramManager::BindGraphicsPipeline() { | 35 | void ProgramManager::BindGraphicsPipeline() { |
| 23 | if (!is_graphics_bound) { | 36 | if (use_assembly_programs) { |
| 24 | is_graphics_bound = true; | 37 | UpdateAssemblyPrograms(); |
| 25 | glUseProgram(0); | 38 | } else { |
| 39 | UpdateSourcePrograms(); | ||
| 26 | } | 40 | } |
| 41 | } | ||
| 27 | 42 | ||
| 28 | // Avoid updating the pipeline when values have no changed | 43 | void ProgramManager::BindHostPipeline(GLuint pipeline) { |
| 29 | if (old_state == current_state) { | 44 | if (use_assembly_programs) { |
| 30 | return; | 45 | if (geometry_enabled) { |
| 46 | geometry_enabled = false; | ||
| 47 | old_state.geometry = 0; | ||
| 48 | glDisable(GL_GEOMETRY_PROGRAM_NV); | ||
| 49 | } | ||
| 50 | } else { | ||
| 51 | if (!is_graphics_bound) { | ||
| 52 | glUseProgram(0); | ||
| 53 | } | ||
| 31 | } | 54 | } |
| 55 | glBindProgramPipeline(pipeline); | ||
| 56 | } | ||
| 32 | 57 | ||
| 33 | // Workaround for AMD bug | 58 | void ProgramManager::RestoreGuestPipeline() { |
| 34 | static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | | 59 | if (use_assembly_programs) { |
| 35 | GL_FRAGMENT_SHADER_BIT}; | 60 | glBindProgramPipeline(0); |
| 36 | const GLuint handle = graphics_pipeline.handle; | 61 | } else { |
| 37 | glUseProgramStages(handle, all_used_stages, 0); | 62 | glBindProgramPipeline(graphics_pipeline.handle); |
| 38 | glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); | 63 | } |
| 39 | glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); | 64 | } |
| 40 | glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); | 65 | |
| 66 | void ProgramManager::UpdateAssemblyPrograms() { | ||
| 67 | const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) { | ||
| 68 | if (current == old) { | ||
| 69 | return; | ||
| 70 | } | ||
| 71 | if (current == 0) { | ||
| 72 | if (enabled) { | ||
| 73 | enabled = false; | ||
| 74 | glDisable(stage); | ||
| 75 | } | ||
| 76 | return; | ||
| 77 | } | ||
| 78 | if (!enabled) { | ||
| 79 | enabled = true; | ||
| 80 | glEnable(stage); | ||
| 81 | } | ||
| 82 | glBindProgramARB(stage, current); | ||
| 83 | }; | ||
| 84 | |||
| 85 | update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex); | ||
| 86 | update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry, | ||
| 87 | old_state.geometry); | ||
| 88 | update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment, | ||
| 89 | old_state.fragment); | ||
| 41 | 90 | ||
| 42 | old_state = current_state; | 91 | old_state = current_state; |
| 43 | } | 92 | } |
| 44 | 93 | ||
| 45 | void ProgramManager::BindComputeShader(GLuint program) { | 94 | void ProgramManager::UpdateSourcePrograms() { |
| 46 | is_graphics_bound = false; | 95 | if (!is_graphics_bound) { |
| 47 | glUseProgram(program); | 96 | is_graphics_bound = true; |
| 97 | glUseProgram(0); | ||
| 98 | } | ||
| 99 | |||
| 100 | const GLuint handle = graphics_pipeline.handle; | ||
| 101 | const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { | ||
| 102 | if (current == old) { | ||
| 103 | return; | ||
| 104 | } | ||
| 105 | glUseProgramStages(handle, stage, current); | ||
| 106 | }; | ||
| 107 | update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); | ||
| 108 | update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); | ||
| 109 | update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); | ||
| 110 | |||
| 111 | old_state = current_state; | ||
| 48 | } | 112 | } |
| 49 | 113 | ||
| 50 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | 114 | void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { |
| @@ -54,4 +118,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | |||
| 54 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; | 118 | y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; |
| 55 | } | 119 | } |
| 56 | 120 | ||
| 57 | } // namespace OpenGL::GLShader | 121 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index d2e47f2a9..0f03b4f12 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -11,7 +11,9 @@ | |||
| 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 11 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 12 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 13 | 13 | ||
| 14 | namespace OpenGL::GLShader { | 14 | namespace OpenGL { |
| 15 | |||
| 16 | class Device; | ||
| 15 | 17 | ||
| 16 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned | 18 | /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned |
| 17 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at | 19 | /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at |
| @@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384, | |||
| 28 | 30 | ||
| 29 | class ProgramManager { | 31 | class ProgramManager { |
| 30 | public: | 32 | public: |
| 31 | explicit ProgramManager(); | 33 | explicit ProgramManager(const Device& device); |
| 32 | ~ProgramManager(); | 34 | ~ProgramManager(); |
| 33 | 35 | ||
| 34 | void Create(); | 36 | /// Binds a compute program |
| 37 | void BindCompute(GLuint program); | ||
| 35 | 38 | ||
| 36 | /// Updates the graphics pipeline and binds it. | 39 | /// Updates bound programs. |
| 37 | void BindGraphicsPipeline(); | 40 | void BindGraphicsPipeline(); |
| 38 | 41 | ||
| 39 | /// Binds a compute shader. | 42 | /// Binds an OpenGL pipeline object unsynchronized with the guest state. |
| 40 | void BindComputeShader(GLuint program); | 43 | void BindHostPipeline(GLuint pipeline); |
| 44 | |||
| 45 | /// Rewinds BindHostPipeline state changes. | ||
| 46 | void RestoreGuestPipeline(); | ||
| 41 | 47 | ||
| 42 | void UseVertexShader(GLuint program) { | 48 | void UseVertexShader(GLuint program) { |
| 43 | current_state.vertex_shader = program; | 49 | current_state.vertex = program; |
| 44 | } | 50 | } |
| 45 | 51 | ||
| 46 | void UseGeometryShader(GLuint program) { | 52 | void UseGeometryShader(GLuint program) { |
| 47 | current_state.geometry_shader = program; | 53 | current_state.geometry = program; |
| 48 | } | 54 | } |
| 49 | 55 | ||
| 50 | void UseFragmentShader(GLuint program) { | 56 | void UseFragmentShader(GLuint program) { |
| 51 | current_state.fragment_shader = program; | 57 | current_state.fragment = program; |
| 52 | } | 58 | } |
| 53 | 59 | ||
| 54 | private: | 60 | private: |
| 55 | struct PipelineState { | 61 | struct PipelineState { |
| 56 | bool operator==(const PipelineState& rhs) const noexcept { | 62 | GLuint vertex = 0; |
| 57 | return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && | 63 | GLuint geometry = 0; |
| 58 | geometry_shader == rhs.geometry_shader; | 64 | GLuint fragment = 0; |
| 59 | } | ||
| 60 | |||
| 61 | bool operator!=(const PipelineState& rhs) const noexcept { | ||
| 62 | return !operator==(rhs); | ||
| 63 | } | ||
| 64 | |||
| 65 | GLuint vertex_shader = 0; | ||
| 66 | GLuint fragment_shader = 0; | ||
| 67 | GLuint geometry_shader = 0; | ||
| 68 | }; | 65 | }; |
| 69 | 66 | ||
| 67 | /// Update NV_gpu_program5 programs. | ||
| 68 | void UpdateAssemblyPrograms(); | ||
| 69 | |||
| 70 | /// Update GLSL programs. | ||
| 71 | void UpdateSourcePrograms(); | ||
| 72 | |||
| 70 | OGLPipeline graphics_pipeline; | 73 | OGLPipeline graphics_pipeline; |
| 71 | OGLPipeline compute_pipeline; | 74 | |
| 72 | PipelineState current_state; | 75 | PipelineState current_state; |
| 73 | PipelineState old_state; | 76 | PipelineState old_state; |
| 77 | |||
| 78 | bool use_assembly_programs = false; | ||
| 79 | |||
| 74 | bool is_graphics_bound = true; | 80 | bool is_graphics_bound = true; |
| 81 | |||
| 82 | bool vertex_enabled = false; | ||
| 83 | bool geometry_enabled = false; | ||
| 84 | bool fragment_enabled = false; | ||
| 75 | }; | 85 | }; |
| 76 | 86 | ||
| 77 | } // namespace OpenGL::GLShader | 87 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 6ec328c53..932a2f69e 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -49,14 +49,6 @@ OGLStreamBuffer::~OGLStreamBuffer() { | |||
| 49 | gl_buffer.Release(); | 49 | gl_buffer.Release(); |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | GLuint OGLStreamBuffer::GetHandle() const { | ||
| 53 | return gl_buffer.handle; | ||
| 54 | } | ||
| 55 | |||
| 56 | GLsizeiptr OGLStreamBuffer::GetSize() const { | ||
| 57 | return buffer_size; | ||
| 58 | } | ||
| 59 | |||
| 60 | std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | 52 | std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { |
| 61 | ASSERT(size <= buffer_size); | 53 | ASSERT(size <= buffer_size); |
| 62 | ASSERT(alignment <= buffer_size); | 54 | ASSERT(alignment <= buffer_size); |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index f8383cbd4..866da3594 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -17,9 +17,6 @@ public: | |||
| 17 | bool use_persistent = true); | 17 | bool use_persistent = true); |
| 18 | ~OGLStreamBuffer(); | 18 | ~OGLStreamBuffer(); |
| 19 | 19 | ||
| 20 | GLuint GetHandle() const; | ||
| 21 | GLsizeiptr GetSize() const; | ||
| 22 | |||
| 23 | /* | 20 | /* |
| 24 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | 21 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes |
| 25 | * and the optional alignment requirement. | 22 | * and the optional alignment requirement. |
| @@ -32,6 +29,14 @@ public: | |||
| 32 | 29 | ||
| 33 | void Unmap(GLsizeiptr size); | 30 | void Unmap(GLsizeiptr size); |
| 34 | 31 | ||
| 32 | GLuint Handle() const { | ||
| 33 | return gl_buffer.handle; | ||
| 34 | } | ||
| 35 | |||
| 36 | GLsizeiptr Size() const { | ||
| 37 | return buffer_size; | ||
| 38 | } | ||
| 39 | |||
| 35 | private: | 40 | private: |
| 36 | OGLBuffer gl_buffer; | 41 | OGLBuffer gl_buffer; |
| 37 | 42 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 94fbd2a22..61505879b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | |||
| 35 | namespace { | 35 | namespace { |
| 36 | 36 | ||
| 37 | struct FormatTuple { | 37 | struct FormatTuple { |
| 38 | GLint internal_format; | 38 | GLenum internal_format; |
| 39 | GLenum format = GL_NONE; | 39 | GLenum format = GL_NONE; |
| 40 | GLenum type = GL_NONE; | 40 | GLenum type = GL_NONE; |
| 41 | }; | 41 | }; |
| @@ -238,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte | |||
| 238 | return texture; | 238 | return texture; |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, | ||
| 242 | SwizzleSource w_source) { | ||
| 243 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 244 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 245 | } | ||
| 246 | |||
| 241 | } // Anonymous namespace | 247 | } // Anonymous namespace |
| 242 | 248 | ||
| 243 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, | 249 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, |
| @@ -257,9 +263,14 @@ CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& param | |||
| 257 | target = GetTextureTarget(params.target); | 263 | target = GetTextureTarget(params.target); |
| 258 | texture = CreateTexture(params, target, internal_format, texture_buffer); | 264 | texture = CreateTexture(params, target, internal_format, texture_buffer); |
| 259 | DecorateSurfaceName(); | 265 | DecorateSurfaceName(); |
| 260 | main_view = CreateViewInner( | 266 | |
| 261 | ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), | 267 | u32 num_layers = 1; |
| 262 | true); | 268 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { |
| 269 | num_layers = params.depth; | ||
| 270 | } | ||
| 271 | |||
| 272 | main_view = | ||
| 273 | CreateViewInner(ViewParams(params.target, 0, num_layers, 0, params.num_levels), true); | ||
| 263 | } | 274 | } |
| 264 | 275 | ||
| 265 | CachedSurface::~CachedSurface() = default; | 276 | CachedSurface::~CachedSurface() = default; |
| @@ -381,7 +392,7 @@ void CachedSurface::DecorateSurfaceName() { | |||
| 381 | } | 392 | } |
| 382 | 393 | ||
| 383 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { | 394 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { |
| 384 | LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); | 395 | LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); |
| 385 | } | 396 | } |
| 386 | 397 | ||
| 387 | View CachedSurface::CreateView(const ViewParams& view_key) { | 398 | View CachedSurface::CreateView(const ViewParams& view_key) { |
| @@ -397,32 +408,33 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr | |||
| 397 | } | 408 | } |
| 398 | 409 | ||
| 399 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, | 410 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, |
| 400 | const bool is_proxy) | 411 | bool is_proxy) |
| 401 | : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { | 412 | : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format}, |
| 402 | target = GetTextureTarget(params.target); | 413 | target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { |
| 403 | format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; | ||
| 404 | if (!is_proxy) { | 414 | if (!is_proxy) { |
| 405 | texture_view = CreateTextureView(); | 415 | main_view = CreateTextureView(); |
| 406 | } | 416 | } |
| 407 | swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); | ||
| 408 | } | 417 | } |
| 409 | 418 | ||
| 410 | CachedSurfaceView::~CachedSurfaceView() = default; | 419 | CachedSurfaceView::~CachedSurfaceView() = default; |
| 411 | 420 | ||
| 412 | void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | 421 | void CachedSurfaceView::Attach(GLenum attachment, GLenum fb_target) const { |
| 413 | ASSERT(params.num_levels == 1); | 422 | ASSERT(params.num_levels == 1); |
| 414 | 423 | ||
| 424 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 425 | if (params.num_layers > 1) { | ||
| 426 | ASSERT(params.base_layer == 0); | ||
| 427 | glFramebufferTexture(fb_target, attachment, surface.texture.handle, params.base_level); | ||
| 428 | } else { | ||
| 429 | glFramebufferTexture3D(fb_target, attachment, target, surface.texture.handle, | ||
| 430 | params.base_level, params.base_layer); | ||
| 431 | } | ||
| 432 | return; | ||
| 433 | } | ||
| 434 | |||
| 415 | if (params.num_layers > 1) { | 435 | if (params.num_layers > 1) { |
| 416 | // Layered framebuffer attachments | ||
| 417 | UNIMPLEMENTED_IF(params.base_layer != 0); | 436 | UNIMPLEMENTED_IF(params.base_layer != 0); |
| 418 | 437 | glFramebufferTexture(fb_target, attachment, GetTexture(), 0); | |
| 419 | switch (params.target) { | ||
| 420 | case SurfaceTarget::Texture2DArray: | ||
| 421 | glFramebufferTexture(target, attachment, GetTexture(), 0); | ||
| 422 | break; | ||
| 423 | default: | ||
| 424 | UNIMPLEMENTED(); | ||
| 425 | } | ||
| 426 | return; | 438 | return; |
| 427 | } | 439 | } |
| 428 | 440 | ||
| @@ -430,16 +442,16 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | |||
| 430 | const GLuint texture = surface.GetTexture(); | 442 | const GLuint texture = surface.GetTexture(); |
| 431 | switch (surface.GetSurfaceParams().target) { | 443 | switch (surface.GetSurfaceParams().target) { |
| 432 | case SurfaceTarget::Texture1D: | 444 | case SurfaceTarget::Texture1D: |
| 433 | glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); | 445 | glFramebufferTexture1D(fb_target, attachment, view_target, texture, params.base_level); |
| 434 | break; | 446 | break; |
| 435 | case SurfaceTarget::Texture2D: | 447 | case SurfaceTarget::Texture2D: |
| 436 | glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); | 448 | glFramebufferTexture2D(fb_target, attachment, view_target, texture, params.base_level); |
| 437 | break; | 449 | break; |
| 438 | case SurfaceTarget::Texture1DArray: | 450 | case SurfaceTarget::Texture1DArray: |
| 439 | case SurfaceTarget::Texture2DArray: | 451 | case SurfaceTarget::Texture2DArray: |
| 440 | case SurfaceTarget::TextureCubemap: | 452 | case SurfaceTarget::TextureCubemap: |
| 441 | case SurfaceTarget::TextureCubeArray: | 453 | case SurfaceTarget::TextureCubeArray: |
| 442 | glFramebufferTextureLayer(target, attachment, texture, params.base_level, | 454 | glFramebufferTextureLayer(fb_target, attachment, texture, params.base_level, |
| 443 | params.base_layer); | 455 | params.base_layer); |
| 444 | break; | 456 | break; |
| 445 | default: | 457 | default: |
| @@ -447,35 +459,62 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | |||
| 447 | } | 459 | } |
| 448 | } | 460 | } |
| 449 | 461 | ||
| 450 | void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, | 462 | GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, |
| 451 | SwizzleSource z_source, SwizzleSource w_source) { | 463 | SwizzleSource z_source, SwizzleSource w_source) { |
| 452 | u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 464 | if (GetSurfaceParams().IsBuffer()) { |
| 453 | if (new_swizzle == swizzle) | 465 | return GetTexture(); |
| 454 | return; | 466 | } |
| 455 | swizzle = new_swizzle; | 467 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); |
| 456 | const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), | 468 | if (current_swizzle == new_swizzle) { |
| 457 | GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; | 469 | return current_view; |
| 458 | const GLuint handle = GetTexture(); | 470 | } |
| 459 | const PixelFormat format = surface.GetSurfaceParams().pixel_format; | 471 | current_swizzle = new_swizzle; |
| 460 | switch (format) { | 472 | |
| 473 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); | ||
| 474 | OGLTextureView& view = entry->second; | ||
| 475 | if (!is_cache_miss) { | ||
| 476 | current_view = view.handle; | ||
| 477 | return view.handle; | ||
| 478 | } | ||
| 479 | view = CreateTextureView(); | ||
| 480 | current_view = view.handle; | ||
| 481 | |||
| 482 | std::array swizzle{x_source, y_source, z_source, w_source}; | ||
| 483 | |||
| 484 | switch (const PixelFormat format = GetSurfaceParams().pixel_format) { | ||
| 461 | case PixelFormat::Z24S8: | 485 | case PixelFormat::Z24S8: |
| 462 | case PixelFormat::Z32FS8: | 486 | case PixelFormat::Z32FS8: |
| 463 | case PixelFormat::S8Z24: | 487 | case PixelFormat::S8Z24: |
| 464 | glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, | 488 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); |
| 489 | glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, | ||
| 465 | GetComponent(format, x_source == SwizzleSource::R)); | 490 | GetComponent(format, x_source == SwizzleSource::R)); |
| 466 | break; | 491 | |
| 467 | default: | 492 | // Make sure we sample the first component |
| 468 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | 493 | std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { |
| 494 | return value == SwizzleSource::G ? SwizzleSource::R : value; | ||
| 495 | }); | ||
| 496 | [[fallthrough]]; | ||
| 497 | default: { | ||
| 498 | const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), | ||
| 499 | GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; | ||
| 500 | glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | ||
| 469 | break; | 501 | break; |
| 470 | } | 502 | } |
| 503 | } | ||
| 504 | return view.handle; | ||
| 471 | } | 505 | } |
| 472 | 506 | ||
| 473 | OGLTextureView CachedSurfaceView::CreateTextureView() const { | 507 | OGLTextureView CachedSurfaceView::CreateTextureView() const { |
| 474 | OGLTextureView texture_view; | 508 | OGLTextureView texture_view; |
| 475 | texture_view.Create(); | 509 | texture_view.Create(); |
| 476 | 510 | ||
| 477 | glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, | 511 | if (target == GL_TEXTURE_3D) { |
| 478 | params.num_levels, params.base_layer, params.num_layers); | 512 | glTextureView(texture_view.handle, target, surface.texture.handle, format, |
| 513 | params.base_level, params.num_levels, 0, 1); | ||
| 514 | } else { | ||
| 515 | glTextureView(texture_view.handle, target, surface.texture.handle, format, | ||
| 516 | params.base_level, params.num_levels, params.base_layer, params.num_layers); | ||
| 517 | } | ||
| 479 | ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); | 518 | ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); |
| 480 | 519 | ||
| 481 | return texture_view; | 520 | return texture_view; |
| @@ -518,8 +557,8 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||
| 518 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 557 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 519 | const auto& src_params{src_view->GetSurfaceParams()}; | 558 | const auto& src_params{src_view->GetSurfaceParams()}; |
| 520 | const auto& dst_params{dst_view->GetSurfaceParams()}; | 559 | const auto& dst_params{dst_view->GetSurfaceParams()}; |
| 521 | UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); | 560 | UNIMPLEMENTED_IF(src_params.depth != 1); |
| 522 | UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); | 561 | UNIMPLEMENTED_IF(dst_params.depth != 1); |
| 523 | 562 | ||
| 524 | state_tracker.NotifyScissor0(); | 563 | state_tracker.NotifyScissor0(); |
| 525 | state_tracker.NotifyFramebuffer(); | 564 | state_tracker.NotifyFramebuffer(); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 02d9981a1..bfc4ddf5d 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -80,10 +80,12 @@ public: | |||
| 80 | explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); | 80 | explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); |
| 81 | ~CachedSurfaceView(); | 81 | ~CachedSurfaceView(); |
| 82 | 82 | ||
| 83 | /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER | 83 | /// @brief Attaches this texture view to the currently bound fb_target framebuffer |
| 84 | void Attach(GLenum attachment, GLenum target) const; | 84 | /// @param attachment Attachment to bind textures to |
| 85 | /// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER) | ||
| 86 | void Attach(GLenum attachment, GLenum fb_target) const; | ||
| 85 | 87 | ||
| 86 | void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, | 88 | GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, |
| 87 | Tegra::Texture::SwizzleSource y_source, | 89 | Tegra::Texture::SwizzleSource y_source, |
| 88 | Tegra::Texture::SwizzleSource z_source, | 90 | Tegra::Texture::SwizzleSource z_source, |
| 89 | Tegra::Texture::SwizzleSource w_source); | 91 | Tegra::Texture::SwizzleSource w_source); |
| @@ -98,7 +100,7 @@ public: | |||
| 98 | if (is_proxy) { | 100 | if (is_proxy) { |
| 99 | return surface.GetTexture(); | 101 | return surface.GetTexture(); |
| 100 | } | 102 | } |
| 101 | return texture_view.handle; | 103 | return main_view.handle; |
| 102 | } | 104 | } |
| 103 | 105 | ||
| 104 | GLenum GetFormat() const { | 106 | GLenum GetFormat() const { |
| @@ -110,23 +112,19 @@ public: | |||
| 110 | } | 112 | } |
| 111 | 113 | ||
| 112 | private: | 114 | private: |
| 113 | u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 114 | Tegra::Texture::SwizzleSource y_source, | ||
| 115 | Tegra::Texture::SwizzleSource z_source, | ||
| 116 | Tegra::Texture::SwizzleSource w_source) const { | ||
| 117 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 118 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 119 | } | ||
| 120 | |||
| 121 | OGLTextureView CreateTextureView() const; | 115 | OGLTextureView CreateTextureView() const; |
| 122 | 116 | ||
| 123 | CachedSurface& surface; | 117 | CachedSurface& surface; |
| 124 | GLenum target{}; | 118 | const GLenum format; |
| 125 | GLenum format{}; | 119 | const GLenum target; |
| 120 | const bool is_proxy; | ||
| 121 | |||
| 122 | std::unordered_map<u32, OGLTextureView> view_cache; | ||
| 123 | OGLTextureView main_view; | ||
| 126 | 124 | ||
| 127 | OGLTextureView texture_view; | 125 | // Use an invalid default so it always fails the comparison test |
| 128 | u32 swizzle{}; | 126 | u32 current_swizzle = 0xffffffff; |
| 129 | bool is_proxy{}; | 127 | GLuint current_view = 0; |
| 130 | }; | 128 | }; |
| 131 | 129 | ||
| 132 | class TextureCacheOpenGL final : public TextureCacheBase { | 130 | class TextureCacheOpenGL final : public TextureCacheBase { |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b2a179746..6214fcbc3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -316,7 +316,7 @@ public: | |||
| 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, | 316 | RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, |
| 317 | Core::Frontend::GraphicsContext& context) | 317 | Core::Frontend::GraphicsContext& context) |
| 318 | : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, | 318 | : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, |
| 319 | has_debug_tool{HasDebugTool()} {} | 319 | program_manager{device}, has_debug_tool{HasDebugTool()} {} |
| 320 | 320 | ||
| 321 | RendererOpenGL::~RendererOpenGL() = default; | 321 | RendererOpenGL::~RendererOpenGL() = default; |
| 322 | 322 | ||
| @@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 468 | vertex_program.Create(true, false, vertex_shader.handle); | 468 | vertex_program.Create(true, false, vertex_shader.handle); |
| 469 | fragment_program.Create(true, false, fragment_shader.handle); | 469 | fragment_program.Create(true, false, fragment_shader.handle); |
| 470 | 470 | ||
| 471 | // Create program pipeline | 471 | pipeline.Create(); |
| 472 | program_manager.Create(); | 472 | glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); |
| 473 | glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | ||
| 473 | 474 | ||
| 474 | // Generate VBO handle for drawing | 475 | // Generate VBO handle for drawing |
| 475 | vertex_buffer.Create(); | 476 | vertex_buffer.Create(); |
| @@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() { | |||
| 508 | if (rasterizer) { | 509 | if (rasterizer) { |
| 509 | return; | 510 | return; |
| 510 | } | 511 | } |
| 511 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, | 512 | rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info, |
| 512 | program_manager, state_tracker); | 513 | program_manager, state_tracker); |
| 513 | } | 514 | } |
| 514 | 515 | ||
| @@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 620 | state_tracker.NotifyClipControl(); | 621 | state_tracker.NotifyClipControl(); |
| 621 | state_tracker.NotifyAlphaTest(); | 622 | state_tracker.NotifyAlphaTest(); |
| 622 | 623 | ||
| 623 | program_manager.UseVertexShader(vertex_program.handle); | 624 | program_manager.BindHostPipeline(pipeline.handle); |
| 624 | program_manager.UseGeometryShader(0); | ||
| 625 | program_manager.UseFragmentShader(fragment_program.handle); | ||
| 626 | program_manager.BindGraphicsPipeline(); | ||
| 627 | 625 | ||
| 628 | glEnable(GL_CULL_FACE); | 626 | glEnable(GL_CULL_FACE); |
| 629 | if (screen_info.display_srgb) { | 627 | if (screen_info.display_srgb) { |
| @@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 665 | 663 | ||
| 666 | glClear(GL_COLOR_BUFFER_BIT); | 664 | glClear(GL_COLOR_BUFFER_BIT); |
| 667 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | 665 | glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); |
| 666 | |||
| 667 | program_manager.RestoreGuestPipeline(); | ||
| 668 | } | 668 | } |
| 669 | 669 | ||
| 670 | bool RendererOpenGL::TryPresent(int timeout_ms) { | 670 | bool RendererOpenGL::TryPresent(int timeout_ms) { |
| @@ -751,8 +751,9 @@ void RendererOpenGL::RenderScreenshot() { | |||
| 751 | } | 751 | } |
| 752 | 752 | ||
| 753 | bool RendererOpenGL::Init() { | 753 | bool RendererOpenGL::Init() { |
| 754 | if (GLAD_GL_KHR_debug) { | 754 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { |
| 755 | glEnable(GL_DEBUG_OUTPUT); | 755 | glEnable(GL_DEBUG_OUTPUT); |
| 756 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||
| 756 | glDebugMessageCallback(DebugHandler, nullptr); | 757 | glDebugMessageCallback(DebugHandler, nullptr); |
| 757 | } | 758 | } |
| 758 | 759 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 50b647661..61bf507f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/math_util.h" | 10 | #include "common/math_util.h" |
| 11 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 13 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 15 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| @@ -95,6 +96,7 @@ private: | |||
| 95 | Core::Frontend::EmuWindow& emu_window; | 96 | Core::Frontend::EmuWindow& emu_window; |
| 96 | Core::System& system; | 97 | Core::System& system; |
| 97 | Core::Frontend::GraphicsContext& context; | 98 | Core::Frontend::GraphicsContext& context; |
| 99 | const Device device; | ||
| 98 | 100 | ||
| 99 | StateTracker state_tracker{system}; | 101 | StateTracker state_tracker{system}; |
| 100 | 102 | ||
| @@ -102,13 +104,14 @@ private: | |||
| 102 | OGLBuffer vertex_buffer; | 104 | OGLBuffer vertex_buffer; |
| 103 | OGLProgram vertex_program; | 105 | OGLProgram vertex_program; |
| 104 | OGLProgram fragment_program; | 106 | OGLProgram fragment_program; |
| 107 | OGLPipeline pipeline; | ||
| 105 | OGLFramebuffer screenshot_framebuffer; | 108 | OGLFramebuffer screenshot_framebuffer; |
| 106 | 109 | ||
| 107 | /// Display information for Switch screen | 110 | /// Display information for Switch screen |
| 108 | ScreenInfo screen_info; | 111 | ScreenInfo screen_info; |
| 109 | 112 | ||
| 110 | /// Global dummy shader pipeline | 113 | /// Global dummy shader pipeline |
| 111 | GLShader::ProgramManager program_manager; | 114 | ProgramManager program_manager; |
| 112 | 115 | ||
| 113 | /// OpenGL framebuffer data | 116 | /// OpenGL framebuffer data |
| 114 | std::vector<u8> gl_framebuffer_data; | 117 | std::vector<u8> gl_framebuffer_data; |
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 568744e3c..424278816 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { | |||
| 71 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); | 71 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); |
| 72 | 72 | ||
| 73 | u32 packed_front_face = PackFrontFace(regs.front_face); | 73 | u32 packed_front_face = PackFrontFace(regs.front_face); |
| 74 | if (regs.screen_y_control.triangle_rast_flip != 0 && | 74 | if (regs.screen_y_control.triangle_rast_flip != 0) { |
| 75 | regs.viewport_transform[0].scale_y > 0.0f) { | ||
| 76 | // Flip front face | 75 | // Flip front face |
| 77 | packed_front_face = 1 - packed_front_face; | 76 | packed_front_face = 1 - packed_front_face; |
| 78 | } | 77 | } |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 12be691a5..62e950d31 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -142,14 +142,14 @@ struct FormatTuple { | |||
| 142 | {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 | 142 | {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 |
| 143 | {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 | 143 | {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 |
| 144 | {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 | 144 | {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 |
| 145 | {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 | 145 | {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 |
| 146 | {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F | 146 | {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F |
| 147 | {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F | 147 | {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F |
| 148 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F | 148 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F |
| 149 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F | 149 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F |
| 150 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U | 150 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U |
| 151 | {VK_FORMAT_UNDEFINED}, // R16S | 151 | {VK_FORMAT_UNDEFINED}, // R16S |
| 152 | {VK_FORMAT_UNDEFINED}, // R16UI | 152 | {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI |
| 153 | {VK_FORMAT_UNDEFINED}, // R16I | 153 | {VK_FORMAT_UNDEFINED}, // R16I |
| 154 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 | 154 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 |
| 155 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F | 155 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F |
| @@ -168,7 +168,7 @@ struct FormatTuple { | |||
| 168 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 | 168 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 |
| 169 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 | 169 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 |
| 170 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 | 170 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 |
| 171 | {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB | 171 | {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB |
| 172 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB | 172 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB |
| 173 | {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB | 173 | {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB |
| 174 | {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB | 174 | {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5b494da8c..1fde38328 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | 12 | #include "video_core/renderer_vulkan/vk_device.h" |
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| @@ -36,8 +37,8 @@ std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKSch | |||
| 36 | 37 | ||
| 37 | } // Anonymous namespace | 38 | } // Anonymous namespace |
| 38 | 39 | ||
| 39 | CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 40 | Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, |
| 40 | VAddr cpu_addr, std::size_t size) | 41 | std::size_t size) |
| 41 | : VideoCommon::BufferBlock{cpu_addr, size} { | 42 | : VideoCommon::BufferBlock{cpu_addr, size} { |
| 42 | VkBufferCreateInfo ci; | 43 | VkBufferCreateInfo ci; |
| 43 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; | 44 | ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; |
| @@ -53,7 +54,7 @@ CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& me | |||
| 53 | buffer.commit = memory_manager.Commit(buffer.handle, false); | 54 | buffer.commit = memory_manager.Commit(buffer.handle, false); |
| 54 | } | 55 | } |
| 55 | 56 | ||
| 56 | CachedBufferBlock::~CachedBufferBlock() = default; | 57 | Buffer::~Buffer() = default; |
| 57 | 58 | ||
| 58 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 59 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| 59 | const VKDevice& device, VKMemoryManager& memory_manager, | 60 | const VKDevice& device, VKMemoryManager& memory_manager, |
| @@ -66,12 +67,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S | |||
| 66 | 67 | ||
| 67 | VKBufferCache::~VKBufferCache() = default; | 68 | VKBufferCache::~VKBufferCache() = default; |
| 68 | 69 | ||
| 69 | Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 70 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 70 | return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); | 71 | return std::make_shared<Buffer>(device, memory_manager, cpu_addr, size); |
| 71 | } | ||
| 72 | |||
| 73 | VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) { | ||
| 74 | return buffer->GetHandle(); | ||
| 75 | } | 72 | } |
| 76 | 73 | ||
| 77 | VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { | 74 | VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { |
| @@ -90,7 +87,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st | |||
| 90 | std::memcpy(staging.commit->Map(size), data, size); | 87 | std::memcpy(staging.commit->Map(size), data, size); |
| 91 | 88 | ||
| 92 | scheduler.RequestOutsideRenderPassOperationContext(); | 89 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 93 | scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, | 90 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, |
| 94 | size](vk::CommandBuffer cmdbuf) { | 91 | size](vk::CommandBuffer cmdbuf) { |
| 95 | cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); | 92 | cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); |
| 96 | 93 | ||
| @@ -113,7 +110,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 113 | u8* data) { | 110 | u8* data) { |
| 114 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); | 111 | const auto& staging = staging_pool.GetUnusedBuffer(size, true); |
| 115 | scheduler.RequestOutsideRenderPassOperationContext(); | 112 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 116 | scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, | 113 | scheduler.Record([staging = *staging.handle, buffer = buffer.Handle(), offset, |
| 117 | size](vk::CommandBuffer cmdbuf) { | 114 | size](vk::CommandBuffer cmdbuf) { |
| 118 | VkBufferMemoryBarrier barrier; | 115 | VkBufferMemoryBarrier barrier; |
| 119 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 116 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; |
| @@ -140,8 +137,8 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, | |||
| 140 | void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, | 137 | void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, |
| 141 | std::size_t dst_offset, std::size_t size) { | 138 | std::size_t dst_offset, std::size_t size) { |
| 142 | scheduler.RequestOutsideRenderPassOperationContext(); | 139 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 143 | scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, | 140 | scheduler.Record([src_buffer = src.Handle(), dst_buffer = dst.Handle(), src_offset, dst_offset, |
| 144 | dst_offset, size](vk::CommandBuffer cmdbuf) { | 141 | size](vk::CommandBuffer cmdbuf) { |
| 145 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); | 142 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); |
| 146 | 143 | ||
| 147 | std::array<VkBufferMemoryBarrier, 2> barriers; | 144 | std::array<VkBufferMemoryBarrier, 2> barriers; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index a54583e7d..9ebbef835 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 10 | #include "video_core/buffer_cache/buffer_cache.h" |
| 11 | #include "video_core/rasterizer_cache.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 11 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 13 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 12 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 14 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
| @@ -24,13 +23,13 @@ class VKDevice; | |||
| 24 | class VKMemoryManager; | 23 | class VKMemoryManager; |
| 25 | class VKScheduler; | 24 | class VKScheduler; |
| 26 | 25 | ||
| 27 | class CachedBufferBlock final : public VideoCommon::BufferBlock { | 26 | class Buffer final : public VideoCommon::BufferBlock { |
| 28 | public: | 27 | public: |
| 29 | explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 28 | explicit Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VAddr cpu_addr, |
| 30 | VAddr cpu_addr, std::size_t size); | 29 | std::size_t size); |
| 31 | ~CachedBufferBlock(); | 30 | ~Buffer(); |
| 32 | 31 | ||
| 33 | VkBuffer GetHandle() const { | 32 | VkBuffer Handle() const { |
| 34 | return *buffer.handle; | 33 | return *buffer.handle; |
| 35 | } | 34 | } |
| 36 | 35 | ||
| @@ -38,8 +37,6 @@ private: | |||
| 38 | VKBuffer buffer; | 37 | VKBuffer buffer; |
| 39 | }; | 38 | }; |
| 40 | 39 | ||
| 41 | using Buffer = std::shared_ptr<CachedBufferBlock>; | ||
| 42 | |||
| 43 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | 40 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { |
| 44 | public: | 41 | public: |
| 45 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, | 42 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, |
| @@ -50,9 +47,7 @@ public: | |||
| 50 | VkBuffer GetEmptyBuffer(std::size_t size) override; | 47 | VkBuffer GetEmptyBuffer(std::size_t size) override; |
| 51 | 48 | ||
| 52 | protected: | 49 | protected: |
| 53 | VkBuffer ToHandle(const Buffer& buffer) override; | 50 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 54 | |||
| 55 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; | ||
| 56 | 51 | ||
| 57 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 52 | void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 58 | const u8* data) override; | 53 | const u8* data) override; |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e1b46277..281bf9ac3 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -53,8 +53,9 @@ vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const { | |||
| 53 | }; | 53 | }; |
| 54 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); | 54 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size()); |
| 55 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); | 55 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size()); |
| 56 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.texel_buffers.size()); | 56 | add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size()); |
| 57 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); | 57 | add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size()); |
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size()); | ||
| 58 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); | 59 | add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size()); |
| 59 | 60 | ||
| 60 | VkDescriptorSetLayoutCreateInfo ci; | 61 | VkDescriptorSetLayoutCreateInfo ci; |
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 890fd52cf..9259b618d 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp | |||
| @@ -42,6 +42,7 @@ vk::DescriptorPool* VKDescriptorPool::AllocateNewPool() { | |||
| 42 | {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, | 42 | {VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, num_sets * 60}, |
| 43 | {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, | 43 | {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, num_sets * 64}, |
| 44 | {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, | 44 | {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, num_sets * 64}, |
| 45 | {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, num_sets * 64}, | ||
| 45 | {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; | 46 | {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, num_sets * 40}}; |
| 46 | 47 | ||
| 47 | VkDescriptorPoolCreateInfo ci; | 48 | VkDescriptorPoolCreateInfo ci; |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f0c491d00..9fd8ac3f6 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -73,75 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType | |||
| 73 | 73 | ||
| 74 | std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | 74 | std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( |
| 75 | vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { | 75 | vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { |
| 76 | static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, | 76 | static constexpr std::array formats{ |
| 77 | VK_FORMAT_A8B8G8R8_UINT_PACK32, | 77 | VK_FORMAT_A8B8G8R8_UNORM_PACK32, |
| 78 | VK_FORMAT_A8B8G8R8_SNORM_PACK32, | 78 | VK_FORMAT_A8B8G8R8_UINT_PACK32, |
| 79 | VK_FORMAT_A8B8G8R8_SRGB_PACK32, | 79 | VK_FORMAT_A8B8G8R8_SNORM_PACK32, |
| 80 | VK_FORMAT_B5G6R5_UNORM_PACK16, | 80 | VK_FORMAT_A8B8G8R8_SRGB_PACK32, |
| 81 | VK_FORMAT_A2B10G10R10_UNORM_PACK32, | 81 | VK_FORMAT_B5G6R5_UNORM_PACK16, |
| 82 | VK_FORMAT_A1R5G5B5_UNORM_PACK16, | 82 | VK_FORMAT_A2B10G10R10_UNORM_PACK32, |
| 83 | VK_FORMAT_R32G32B32A32_SFLOAT, | 83 | VK_FORMAT_A1R5G5B5_UNORM_PACK16, |
| 84 | VK_FORMAT_R32G32B32A32_UINT, | 84 | VK_FORMAT_R32G32B32A32_SFLOAT, |
| 85 | VK_FORMAT_R32G32_SFLOAT, | 85 | VK_FORMAT_R32G32B32A32_UINT, |
| 86 | VK_FORMAT_R32G32_UINT, | 86 | VK_FORMAT_R32G32_SFLOAT, |
| 87 | VK_FORMAT_R16G16B16A16_UINT, | 87 | VK_FORMAT_R32G32_UINT, |
| 88 | VK_FORMAT_R16G16B16A16_SNORM, | 88 | VK_FORMAT_R16G16B16A16_UINT, |
| 89 | VK_FORMAT_R16G16B16A16_UNORM, | 89 | VK_FORMAT_R16G16B16A16_SNORM, |
| 90 | VK_FORMAT_R16G16_UNORM, | 90 | VK_FORMAT_R16G16B16A16_UNORM, |
| 91 | VK_FORMAT_R16G16_SNORM, | 91 | VK_FORMAT_R16G16_UNORM, |
| 92 | VK_FORMAT_R16G16_SFLOAT, | 92 | VK_FORMAT_R16G16_SNORM, |
| 93 | VK_FORMAT_R16_UNORM, | 93 | VK_FORMAT_R16G16_SFLOAT, |
| 94 | VK_FORMAT_R8G8B8A8_SRGB, | 94 | VK_FORMAT_R16_UNORM, |
| 95 | VK_FORMAT_R8G8_UNORM, | 95 | VK_FORMAT_R16_UINT, |
| 96 | VK_FORMAT_R8G8_SNORM, | 96 | VK_FORMAT_R8G8B8A8_SRGB, |
| 97 | VK_FORMAT_R8G8_UINT, | 97 | VK_FORMAT_R8G8_UNORM, |
| 98 | VK_FORMAT_R8_UNORM, | 98 | VK_FORMAT_R8G8_SNORM, |
| 99 | VK_FORMAT_R8_UINT, | 99 | VK_FORMAT_R8G8_UINT, |
| 100 | VK_FORMAT_B10G11R11_UFLOAT_PACK32, | 100 | VK_FORMAT_R8_UNORM, |
| 101 | VK_FORMAT_R32_SFLOAT, | 101 | VK_FORMAT_R8_UINT, |
| 102 | VK_FORMAT_R32_UINT, | 102 | VK_FORMAT_B10G11R11_UFLOAT_PACK32, |
| 103 | VK_FORMAT_R32_SINT, | 103 | VK_FORMAT_R32_SFLOAT, |
| 104 | VK_FORMAT_R16_SFLOAT, | 104 | VK_FORMAT_R32_UINT, |
| 105 | VK_FORMAT_R16G16B16A16_SFLOAT, | 105 | VK_FORMAT_R32_SINT, |
| 106 | VK_FORMAT_B8G8R8A8_UNORM, | 106 | VK_FORMAT_R16_SFLOAT, |
| 107 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, | 107 | VK_FORMAT_R16G16B16A16_SFLOAT, |
| 108 | VK_FORMAT_D32_SFLOAT, | 108 | VK_FORMAT_B8G8R8A8_UNORM, |
| 109 | VK_FORMAT_D16_UNORM, | 109 | VK_FORMAT_B8G8R8A8_SRGB, |
| 110 | VK_FORMAT_D16_UNORM_S8_UINT, | 110 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, |
| 111 | VK_FORMAT_D24_UNORM_S8_UINT, | 111 | VK_FORMAT_D32_SFLOAT, |
| 112 | VK_FORMAT_D32_SFLOAT_S8_UINT, | 112 | VK_FORMAT_D16_UNORM, |
| 113 | VK_FORMAT_BC1_RGBA_UNORM_BLOCK, | 113 | VK_FORMAT_D16_UNORM_S8_UINT, |
| 114 | VK_FORMAT_BC2_UNORM_BLOCK, | 114 | VK_FORMAT_D24_UNORM_S8_UINT, |
| 115 | VK_FORMAT_BC3_UNORM_BLOCK, | 115 | VK_FORMAT_D32_SFLOAT_S8_UINT, |
| 116 | VK_FORMAT_BC4_UNORM_BLOCK, | 116 | VK_FORMAT_BC1_RGBA_UNORM_BLOCK, |
| 117 | VK_FORMAT_BC5_UNORM_BLOCK, | 117 | VK_FORMAT_BC2_UNORM_BLOCK, |
| 118 | VK_FORMAT_BC5_SNORM_BLOCK, | 118 | VK_FORMAT_BC3_UNORM_BLOCK, |
| 119 | VK_FORMAT_BC7_UNORM_BLOCK, | 119 | VK_FORMAT_BC4_UNORM_BLOCK, |
| 120 | VK_FORMAT_BC6H_UFLOAT_BLOCK, | 120 | VK_FORMAT_BC5_UNORM_BLOCK, |
| 121 | VK_FORMAT_BC6H_SFLOAT_BLOCK, | 121 | VK_FORMAT_BC5_SNORM_BLOCK, |
| 122 | VK_FORMAT_BC1_RGBA_SRGB_BLOCK, | 122 | VK_FORMAT_BC7_UNORM_BLOCK, |
| 123 | VK_FORMAT_BC2_SRGB_BLOCK, | 123 | VK_FORMAT_BC6H_UFLOAT_BLOCK, |
| 124 | VK_FORMAT_BC3_SRGB_BLOCK, | 124 | VK_FORMAT_BC6H_SFLOAT_BLOCK, |
| 125 | VK_FORMAT_BC7_SRGB_BLOCK, | 125 | VK_FORMAT_BC1_RGBA_SRGB_BLOCK, |
| 126 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, | 126 | VK_FORMAT_BC2_SRGB_BLOCK, |
| 127 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | 127 | VK_FORMAT_BC3_SRGB_BLOCK, |
| 128 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | 128 | VK_FORMAT_BC7_SRGB_BLOCK, |
| 129 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, | 129 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, |
| 130 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, | 130 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, |
| 131 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, | 131 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, |
| 132 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | 132 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, |
| 133 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | 133 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, |
| 134 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, | 134 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, |
| 135 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, | 135 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, |
| 136 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, | 136 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, |
| 137 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, | 137 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, |
| 138 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, | 138 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, |
| 139 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, | 139 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, |
| 140 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | 140 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, |
| 141 | VK_FORMAT_ASTC_8x6_SRGB_BLOCK, | 141 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, |
| 142 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, | 142 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, |
| 143 | VK_FORMAT_ASTC_6x5_SRGB_BLOCK, | 143 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, |
| 144 | VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; | 144 | VK_FORMAT_ASTC_8x6_SRGB_BLOCK, |
| 145 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, | ||
| 146 | VK_FORMAT_ASTC_6x5_SRGB_BLOCK, | ||
| 147 | VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, | ||
| 148 | }; | ||
| 145 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | 149 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
| 146 | for (const auto format : formats) { | 150 | for (const auto format : formats) { |
| 147 | format_properties.emplace(format, physical.GetFormatProperties(format)); | 151 | format_properties.emplace(format, physical.GetFormatProperties(format)); |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 04d07fe6a..043fe7947 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | #include "video_core/fence_manager.h" | 9 | #include "video_core/fence_manager.h" |
| 10 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | 11 | #include "video_core/renderer_vulkan/wrapper.h" |
| 11 | 12 | ||
| 12 | namespace Core { | 13 | namespace Core { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index fe45ed269..ea66e621e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "video_core/renderer_vulkan/wrapper.h" | 27 | #include "video_core/renderer_vulkan/wrapper.h" |
| 28 | #include "video_core/shader/compiler_settings.h" | 28 | #include "video_core/shader/compiler_settings.h" |
| 29 | #include "video_core/shader/memory_util.h" | 29 | #include "video_core/shader/memory_util.h" |
| 30 | #include "video_core/shader_cache.h" | ||
| 30 | 31 | ||
| 31 | namespace Vulkan { | 32 | namespace Vulkan { |
| 32 | 33 | ||
| @@ -45,6 +46,7 @@ constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; | |||
| 45 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | 46 | constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; |
| 46 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; | 47 | constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; |
| 47 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; | 48 | constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; |
| 49 | constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; | ||
| 48 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; | 50 | constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; |
| 49 | 51 | ||
| 50 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ | 52 | constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ |
| @@ -104,8 +106,9 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, | |||
| 104 | u32 binding = base_binding; | 106 | u32 binding = base_binding; |
| 105 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); | 107 | AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers); |
| 106 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); | 108 | AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers); |
| 107 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.texel_buffers); | 109 | AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels); |
| 108 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); | 110 | AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers); |
| 111 | AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels); | ||
| 109 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); | 112 | AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images); |
| 110 | return binding; | 113 | return binding; |
| 111 | } | 114 | } |
| @@ -130,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con | |||
| 130 | return std::memcmp(&rhs, this, sizeof *this) == 0; | 133 | return std::memcmp(&rhs, this, sizeof *this) == 0; |
| 131 | } | 134 | } |
| 132 | 135 | ||
| 133 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | 136 | Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, |
| 134 | GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, | 137 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset) |
| 135 | u32 main_offset) | 138 | : gpu_addr{gpu_addr}, program_code{std::move(program_code)}, |
| 136 | : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, | ||
| 137 | registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, | 139 | registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, |
| 138 | compiler_settings, registry}, | 140 | compiler_settings, registry}, |
| 139 | entries{GenerateShaderEntries(shader_ir)} {} | 141 | entries{GenerateShaderEntries(shader_ir)} {} |
| 140 | 142 | ||
| 141 | CachedShader::~CachedShader() = default; | 143 | Shader::~Shader() = default; |
| 142 | 144 | ||
| 143 | Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( | 145 | Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, |
| 144 | Core::System& system, Tegra::Engines::ShaderType stage) { | 146 | Tegra::Engines::ShaderType stage) { |
| 145 | if (stage == Tegra::Engines::ShaderType::Compute) { | 147 | if (stage == ShaderType::Compute) { |
| 146 | return system.GPU().KeplerCompute(); | 148 | return system.GPU().KeplerCompute(); |
| 147 | } else { | 149 | } else { |
| 148 | return system.GPU().Maxwell3D(); | 150 | return system.GPU().Maxwell3D(); |
| @@ -154,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri | |||
| 154 | VKDescriptorPool& descriptor_pool, | 156 | VKDescriptorPool& descriptor_pool, |
| 155 | VKUpdateDescriptorQueue& update_descriptor_queue, | 157 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 156 | VKRenderPassCache& renderpass_cache) | 158 | VKRenderPassCache& renderpass_cache) |
| 157 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, | 159 | : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device}, |
| 158 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, | 160 | scheduler{scheduler}, descriptor_pool{descriptor_pool}, |
| 159 | renderpass_cache{renderpass_cache} {} | 161 | update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {} |
| 160 | 162 | ||
| 161 | VKPipelineCache::~VKPipelineCache() = default; | 163 | VKPipelineCache::~VKPipelineCache() = default; |
| 162 | 164 | ||
| 163 | std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | 165 | std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { |
| 164 | const auto& gpu = system.GPU().Maxwell3D(); | 166 | const auto& gpu = system.GPU().Maxwell3D(); |
| 165 | 167 | ||
| 166 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; | 168 | std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; |
| 167 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 169 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 168 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 170 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 169 | 171 | ||
| @@ -176,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 176 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 178 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; |
| 177 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 179 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 178 | ASSERT(cpu_addr); | 180 | ASSERT(cpu_addr); |
| 179 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; | 181 | |
| 180 | if (!shader) { | 182 | Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); |
| 183 | if (!result) { | ||
| 181 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 184 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; |
| 182 | 185 | ||
| 183 | // No shader found - create a new one | 186 | // No shader found - create a new one |
| 184 | constexpr u32 stage_offset = STAGE_MAIN_OFFSET; | 187 | constexpr u32 stage_offset = STAGE_MAIN_OFFSET; |
| 185 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); | 188 | const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); |
| 186 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); | 189 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); |
| 190 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 191 | |||
| 192 | auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), | ||
| 193 | stage_offset); | ||
| 194 | result = shader.get(); | ||
| 187 | 195 | ||
| 188 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | ||
| 189 | std::move(code), stage_offset); | ||
| 190 | if (cpu_addr) { | 196 | if (cpu_addr) { |
| 191 | Register(shader); | 197 | Register(std::move(shader), *cpu_addr, size_in_bytes); |
| 192 | } else { | 198 | } else { |
| 193 | null_shader = shader; | 199 | null_shader = std::move(shader); |
| 194 | } | 200 | } |
| 195 | } | 201 | } |
| 196 | shaders[index] = std::move(shader); | 202 | shaders[index] = result; |
| 197 | } | 203 | } |
| 198 | return last_shaders = shaders; | 204 | return last_shaders = shaders; |
| 199 | } | 205 | } |
| @@ -234,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 234 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 240 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 235 | ASSERT(cpu_addr); | 241 | ASSERT(cpu_addr); |
| 236 | 242 | ||
| 237 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; | 243 | Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); |
| 238 | if (!shader) { | 244 | if (!shader) { |
| 239 | // No shader found - create a new one | 245 | // No shader found - create a new one |
| 240 | const auto host_ptr = memory_manager.GetPointer(program_addr); | 246 | const auto host_ptr = memory_manager.GetPointer(program_addr); |
| 241 | 247 | ||
| 242 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); | 248 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); |
| 243 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | 249 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 244 | program_addr, *cpu_addr, std::move(code), | 250 | |
| 245 | KERNEL_MAIN_OFFSET); | 251 | auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, |
| 252 | std::move(code), KERNEL_MAIN_OFFSET); | ||
| 253 | shader = shader_info.get(); | ||
| 254 | |||
| 246 | if (cpu_addr) { | 255 | if (cpu_addr) { |
| 247 | Register(shader); | 256 | Register(std::move(shader_info), *cpu_addr, size_in_bytes); |
| 248 | } else { | 257 | } else { |
| 249 | null_kernel = shader; | 258 | null_kernel = std::move(shader_info); |
| 250 | } | 259 | } |
| 251 | } | 260 | } |
| 252 | 261 | ||
| @@ -262,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 262 | return *entry; | 271 | return *entry; |
| 263 | } | 272 | } |
| 264 | 273 | ||
| 265 | void VKPipelineCache::Unregister(const Shader& shader) { | 274 | void VKPipelineCache::OnShaderRemoval(Shader* shader) { |
| 266 | bool finished = false; | 275 | bool finished = false; |
| 267 | const auto Finish = [&] { | 276 | const auto Finish = [&] { |
| 268 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | 277 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and |
| @@ -294,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) { | |||
| 294 | Finish(); | 303 | Finish(); |
| 295 | it = compute_cache.erase(it); | 304 | it = compute_cache.erase(it); |
| 296 | } | 305 | } |
| 297 | |||
| 298 | RasterizerCache::Unregister(shader); | ||
| 299 | } | 306 | } |
| 300 | 307 | ||
| 301 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | 308 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> |
| @@ -312,7 +319,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 312 | ASSERT(point_size != 0.0f); | 319 | ASSERT(point_size != 0.0f); |
| 313 | } | 320 | } |
| 314 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | 321 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { |
| 315 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); | 322 | const auto& attribute = fixed_state.vertex_input.attributes[i]; |
| 323 | specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; | ||
| 324 | specialization.attribute_types[i] = attribute.Type(); | ||
| 316 | } | 325 | } |
| 317 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 326 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 318 | 327 | ||
| @@ -328,13 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 328 | } | 337 | } |
| 329 | 338 | ||
| 330 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | 339 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); |
| 331 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | 340 | const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
| 332 | ASSERT(cpu_addr); | 341 | Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); |
| 333 | const auto shader = TryGet(*cpu_addr); | ||
| 334 | ASSERT(shader); | ||
| 335 | 342 | ||
| 336 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 343 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| 337 | const auto program_type = GetShaderType(program_enum); | 344 | const ShaderType program_type = GetShaderType(program_enum); |
| 338 | const auto& entries = shader->GetEntries(); | 345 | const auto& entries = shader->GetEntries(); |
| 339 | program[stage] = { | 346 | program[stage] = { |
| 340 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), | 347 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |
| @@ -376,16 +383,17 @@ void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u3 | |||
| 376 | return; | 383 | return; |
| 377 | } | 384 | } |
| 378 | 385 | ||
| 379 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER) { | 386 | if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER || |
| 380 | // Nvidia has a bug where updating multiple uniform texels at once causes the driver to | 387 | descriptor_type == STORAGE_TEXEL_BUFFER) { |
| 381 | // crash. | 388 | // Nvidia has a bug where updating multiple texels at once causes the driver to crash. |
| 389 | // Note: Fixed in driver Windows 443.24, Linux 440.66.15 | ||
| 382 | for (u32 i = 0; i < count; ++i) { | 390 | for (u32 i = 0; i < count; ++i) { |
| 383 | VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); | 391 | VkDescriptorUpdateTemplateEntry& entry = template_entries.emplace_back(); |
| 384 | entry.dstBinding = binding + i; | 392 | entry.dstBinding = binding + i; |
| 385 | entry.dstArrayElement = 0; | 393 | entry.dstArrayElement = 0; |
| 386 | entry.descriptorCount = 1; | 394 | entry.descriptorCount = 1; |
| 387 | entry.descriptorType = descriptor_type; | 395 | entry.descriptorType = descriptor_type; |
| 388 | entry.offset = offset + i * entry_size; | 396 | entry.offset = static_cast<std::size_t>(offset + i * entry_size); |
| 389 | entry.stride = entry_size; | 397 | entry.stride = entry_size; |
| 390 | } | 398 | } |
| 391 | } else if (count > 0) { | 399 | } else if (count > 0) { |
| @@ -406,8 +414,9 @@ void FillDescriptorUpdateTemplateEntries( | |||
| 406 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { | 414 | std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) { |
| 407 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); | 415 | AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers); |
| 408 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); | 416 | AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers); |
| 409 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.texel_buffers); | 417 | AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels); |
| 410 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); | 418 | AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers); |
| 419 | AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels); | ||
| 411 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); | 420 | AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images); |
| 412 | } | 421 | } |
| 413 | 422 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0b5796fef..0a36e5112 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | 18 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 19 | #include "video_core/engines/maxwell_3d.h" | 19 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/rasterizer_cache.h" | ||
| 21 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 23 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 22 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| @@ -26,6 +25,7 @@ | |||
| 26 | #include "video_core/shader/memory_util.h" | 25 | #include "video_core/shader/memory_util.h" |
| 27 | #include "video_core/shader/registry.h" | 26 | #include "video_core/shader/registry.h" |
| 28 | #include "video_core/shader/shader_ir.h" | 27 | #include "video_core/shader/shader_ir.h" |
| 28 | #include "video_core/shader_cache.h" | ||
| 29 | 29 | ||
| 30 | namespace Core { | 30 | namespace Core { |
| 31 | class System; | 31 | class System; |
| @@ -41,8 +41,6 @@ class VKFence; | |||
| 41 | class VKScheduler; | 41 | class VKScheduler; |
| 42 | class VKUpdateDescriptorQueue; | 42 | class VKUpdateDescriptorQueue; |
| 43 | 43 | ||
| 44 | class CachedShader; | ||
| 45 | using Shader = std::shared_ptr<CachedShader>; | ||
| 46 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 47 | 45 | ||
| 48 | struct GraphicsPipelineCacheKey { | 46 | struct GraphicsPipelineCacheKey { |
| @@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> { | |||
| 102 | 100 | ||
| 103 | namespace Vulkan { | 101 | namespace Vulkan { |
| 104 | 102 | ||
| 105 | class CachedShader final : public RasterizerCacheObject { | 103 | class Shader { |
| 106 | public: | 104 | public: |
| 107 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, | 105 | explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, |
| 108 | VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, | 106 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset); |
| 109 | u32 main_offset); | 107 | ~Shader(); |
| 110 | ~CachedShader(); | ||
| 111 | 108 | ||
| 112 | GPUVAddr GetGpuAddr() const { | 109 | GPUVAddr GetGpuAddr() const { |
| 113 | return gpu_addr; | 110 | return gpu_addr; |
| 114 | } | 111 | } |
| 115 | 112 | ||
| 116 | std::size_t GetSizeInBytes() const override { | ||
| 117 | return program_code.size() * sizeof(u64); | ||
| 118 | } | ||
| 119 | |||
| 120 | VideoCommon::Shader::ShaderIR& GetIR() { | 113 | VideoCommon::Shader::ShaderIR& GetIR() { |
| 121 | return shader_ir; | 114 | return shader_ir; |
| 122 | } | 115 | } |
| @@ -144,25 +137,23 @@ private: | |||
| 144 | ShaderEntries entries; | 137 | ShaderEntries entries; |
| 145 | }; | 138 | }; |
| 146 | 139 | ||
| 147 | class VKPipelineCache final : public RasterizerCache<Shader> { | 140 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { |
| 148 | public: | 141 | public: |
| 149 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 142 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 150 | const VKDevice& device, VKScheduler& scheduler, | 143 | const VKDevice& device, VKScheduler& scheduler, |
| 151 | VKDescriptorPool& descriptor_pool, | 144 | VKDescriptorPool& descriptor_pool, |
| 152 | VKUpdateDescriptorQueue& update_descriptor_queue, | 145 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 153 | VKRenderPassCache& renderpass_cache); | 146 | VKRenderPassCache& renderpass_cache); |
| 154 | ~VKPipelineCache(); | 147 | ~VKPipelineCache() override; |
| 155 | 148 | ||
| 156 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); | 149 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); |
| 157 | 150 | ||
| 158 | VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); | 151 | VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); |
| 159 | 152 | ||
| 160 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | 153 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |
| 161 | 154 | ||
| 162 | protected: | 155 | protected: |
| 163 | void Unregister(const Shader& shader) override; | 156 | void OnShaderRemoval(Shader* shader) final; |
| 164 | |||
| 165 | void FlushObjectInner(const Shader& object) override {} | ||
| 166 | 157 | ||
| 167 | private: | 158 | private: |
| 168 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( | 159 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( |
| @@ -175,10 +166,10 @@ private: | |||
| 175 | VKUpdateDescriptorQueue& update_descriptor_queue; | 166 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 176 | VKRenderPassCache& renderpass_cache; | 167 | VKRenderPassCache& renderpass_cache; |
| 177 | 168 | ||
| 178 | Shader null_shader{}; | 169 | std::unique_ptr<Shader> null_shader; |
| 179 | Shader null_kernel{}; | 170 | std::unique_ptr<Shader> null_kernel; |
| 180 | 171 | ||
| 181 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 172 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |
| 182 | 173 | ||
| 183 | GraphicsPipelineCacheKey last_graphics_key; | 174 | GraphicsPipelineCacheKey last_graphics_key; |
| 184 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | 175 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 17a2efe8e..184b2238a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 38 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 39 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 39 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 40 | #include "video_core/renderer_vulkan/wrapper.h" | 40 | #include "video_core/renderer_vulkan/wrapper.h" |
| 41 | #include "video_core/shader_cache.h" | ||
| 41 | 42 | ||
| 42 | namespace Vulkan { | 43 | namespace Vulkan { |
| 43 | 44 | ||
| @@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { | |||
| 98 | } | 99 | } |
| 99 | 100 | ||
| 100 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | 101 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( |
| 101 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | 102 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 102 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | 103 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; |
| 103 | for (std::size_t i = 0; i < std::size(addresses); ++i) { | 104 | for (std::size_t i = 0; i < std::size(addresses); ++i) { |
| 104 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | 105 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; |
| @@ -117,6 +118,17 @@ template <typename Engine, typename Entry> | |||
| 117 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 118 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 118 | std::size_t stage, std::size_t index = 0) { | 119 | std::size_t stage, std::size_t index = 0) { |
| 119 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | 120 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); |
| 121 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 122 | if (entry.is_separated) { | ||
| 123 | const u32 buffer_1 = entry.buffer; | ||
| 124 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 125 | const u32 offset_1 = entry.offset; | ||
| 126 | const u32 offset_2 = entry.secondary_offset; | ||
| 127 | const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); | ||
| 128 | const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); | ||
| 129 | return engine.GetTextureInfo(handle_1 | handle_2); | ||
| 130 | } | ||
| 131 | } | ||
| 120 | if (entry.is_bindless) { | 132 | if (entry.is_bindless) { |
| 121 | const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); | 133 | const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); |
| 122 | return engine.GetTextureInfo(tex_handle); | 134 | return engine.GetTextureInfo(tex_handle); |
| @@ -468,8 +480,9 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 468 | const auto& entries = pipeline.GetEntries(); | 480 | const auto& entries = pipeline.GetEntries(); |
| 469 | SetupComputeConstBuffers(entries); | 481 | SetupComputeConstBuffers(entries); |
| 470 | SetupComputeGlobalBuffers(entries); | 482 | SetupComputeGlobalBuffers(entries); |
| 471 | SetupComputeTexelBuffers(entries); | 483 | SetupComputeUniformTexels(entries); |
| 472 | SetupComputeTextures(entries); | 484 | SetupComputeTextures(entries); |
| 485 | SetupComputeStorageTexels(entries); | ||
| 473 | SetupComputeImages(entries); | 486 | SetupComputeImages(entries); |
| 474 | 487 | ||
| 475 | buffer_cache.Unmap(); | 488 | buffer_cache.Unmap(); |
| @@ -532,14 +545,14 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 532 | return; | 545 | return; |
| 533 | } | 546 | } |
| 534 | texture_cache.OnCPUWrite(addr, size); | 547 | texture_cache.OnCPUWrite(addr, size); |
| 535 | pipeline_cache.InvalidateRegion(addr, size); | 548 | pipeline_cache.OnCPUWrite(addr, size); |
| 536 | buffer_cache.OnCPUWrite(addr, size); | 549 | buffer_cache.OnCPUWrite(addr, size); |
| 537 | query_cache.InvalidateRegion(addr, size); | ||
| 538 | } | 550 | } |
| 539 | 551 | ||
| 540 | void RasterizerVulkan::SyncGuestHost() { | 552 | void RasterizerVulkan::SyncGuestHost() { |
| 541 | texture_cache.SyncGuestHost(); | 553 | texture_cache.SyncGuestHost(); |
| 542 | buffer_cache.SyncGuestHost(); | 554 | buffer_cache.SyncGuestHost(); |
| 555 | pipeline_cache.SyncGuestHost(); | ||
| 543 | } | 556 | } |
| 544 | 557 | ||
| 545 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | 558 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { |
| @@ -715,7 +728,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( | |||
| 715 | if (!view) { | 728 | if (!view) { |
| 716 | return false; | 729 | return false; |
| 717 | } | 730 | } |
| 718 | key.views.push_back(view->GetHandle()); | 731 | key.views.push_back(view->GetAttachment()); |
| 719 | key.width = std::min(key.width, view->GetWidth()); | 732 | key.width = std::min(key.width, view->GetWidth()); |
| 720 | key.height = std::min(key.height, view->GetHeight()); | 733 | key.height = std::min(key.height, view->GetHeight()); |
| 721 | key.layers = std::min(key.layers, view->GetNumLayers()); | 734 | key.layers = std::min(key.layers, view->GetNumLayers()); |
| @@ -775,20 +788,21 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt | |||
| 775 | } | 788 | } |
| 776 | 789 | ||
| 777 | void RasterizerVulkan::SetupShaderDescriptors( | 790 | void RasterizerVulkan::SetupShaderDescriptors( |
| 778 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | 791 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 779 | texture_cache.GuardSamplers(true); | 792 | texture_cache.GuardSamplers(true); |
| 780 | 793 | ||
| 781 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 794 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 782 | // Skip VertexA stage | 795 | // Skip VertexA stage |
| 783 | const auto& shader = shaders[stage + 1]; | 796 | Shader* const shader = shaders[stage + 1]; |
| 784 | if (!shader) { | 797 | if (!shader) { |
| 785 | continue; | 798 | continue; |
| 786 | } | 799 | } |
| 787 | const auto& entries = shader->GetEntries(); | 800 | const auto& entries = shader->GetEntries(); |
| 788 | SetupGraphicsConstBuffers(entries, stage); | 801 | SetupGraphicsConstBuffers(entries, stage); |
| 789 | SetupGraphicsGlobalBuffers(entries, stage); | 802 | SetupGraphicsGlobalBuffers(entries, stage); |
| 790 | SetupGraphicsTexelBuffers(entries, stage); | 803 | SetupGraphicsUniformTexels(entries, stage); |
| 791 | SetupGraphicsTextures(entries, stage); | 804 | SetupGraphicsTextures(entries, stage); |
| 805 | SetupGraphicsStorageTexels(entries, stage); | ||
| 792 | SetupGraphicsImages(entries, stage); | 806 | SetupGraphicsImages(entries, stage); |
| 793 | } | 807 | } |
| 794 | texture_cache.GuardSamplers(false); | 808 | texture_cache.GuardSamplers(false); |
| @@ -838,6 +852,10 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 838 | if (regs.tfb_enabled == 0) { | 852 | if (regs.tfb_enabled == 0) { |
| 839 | return; | 853 | return; |
| 840 | } | 854 | } |
| 855 | if (!device.IsExtTransformFeedbackSupported()) { | ||
| 856 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | ||
| 857 | return; | ||
| 858 | } | ||
| 841 | 859 | ||
| 842 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 860 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 843 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 861 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| @@ -866,6 +884,9 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 866 | if (regs.tfb_enabled == 0) { | 884 | if (regs.tfb_enabled == 0) { |
| 867 | return; | 885 | return; |
| 868 | } | 886 | } |
| 887 | if (!device.IsExtTransformFeedbackSupported()) { | ||
| 888 | return; | ||
| 889 | } | ||
| 869 | 890 | ||
| 870 | scheduler.Record( | 891 | scheduler.Record( |
| 871 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 892 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| @@ -877,14 +898,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex | |||
| 877 | 898 | ||
| 878 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | 899 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { |
| 879 | const auto& attrib = regs.vertex_attrib_format[index]; | 900 | const auto& attrib = regs.vertex_attrib_format[index]; |
| 880 | if (!attrib.IsValid()) { | 901 | if (attrib.IsConstant()) { |
| 881 | vertex_input.SetAttribute(index, false, 0, 0, {}, {}); | 902 | vertex_input.SetAttribute(index, false, 0, 0, {}, {}); |
| 882 | continue; | 903 | continue; |
| 883 | } | 904 | } |
| 884 | |||
| 885 | [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; | ||
| 886 | ASSERT(buffer.IsEnabled()); | ||
| 887 | |||
| 888 | vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), | 905 | vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), |
| 889 | attrib.size.Value()); | 906 | attrib.size.Value()); |
| 890 | } | 907 | } |
| @@ -980,12 +997,12 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, | |||
| 980 | } | 997 | } |
| 981 | } | 998 | } |
| 982 | 999 | ||
| 983 | void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { | 1000 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { |
| 984 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1001 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 985 | const auto& gpu = system.GPU().Maxwell3D(); | 1002 | const auto& gpu = system.GPU().Maxwell3D(); |
| 986 | for (const auto& entry : entries.texel_buffers) { | 1003 | for (const auto& entry : entries.uniform_texels) { |
| 987 | const auto image = GetTextureInfo(gpu, entry, stage).tic; | 1004 | const auto image = GetTextureInfo(gpu, entry, stage).tic; |
| 988 | SetupTexelBuffer(image, entry); | 1005 | SetupUniformTexels(image, entry); |
| 989 | } | 1006 | } |
| 990 | } | 1007 | } |
| 991 | 1008 | ||
| @@ -1000,6 +1017,15 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: | |||
| 1000 | } | 1017 | } |
| 1001 | } | 1018 | } |
| 1002 | 1019 | ||
| 1020 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { | ||
| 1021 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1022 | const auto& gpu = system.GPU().Maxwell3D(); | ||
| 1023 | for (const auto& entry : entries.storage_texels) { | ||
| 1024 | const auto image = GetTextureInfo(gpu, entry, stage).tic; | ||
| 1025 | SetupStorageTexel(image, entry); | ||
| 1026 | } | ||
| 1027 | } | ||
| 1028 | |||
| 1003 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { | 1029 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { |
| 1004 | MICROPROFILE_SCOPE(Vulkan_Images); | 1030 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1005 | const auto& gpu = system.GPU().Maxwell3D(); | 1031 | const auto& gpu = system.GPU().Maxwell3D(); |
| @@ -1032,12 +1058,12 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | |||
| 1032 | } | 1058 | } |
| 1033 | } | 1059 | } |
| 1034 | 1060 | ||
| 1035 | void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { | 1061 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |
| 1036 | MICROPROFILE_SCOPE(Vulkan_Textures); | 1062 | MICROPROFILE_SCOPE(Vulkan_Textures); |
| 1037 | const auto& gpu = system.GPU().KeplerCompute(); | 1063 | const auto& gpu = system.GPU().KeplerCompute(); |
| 1038 | for (const auto& entry : entries.texel_buffers) { | 1064 | for (const auto& entry : entries.uniform_texels) { |
| 1039 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | 1065 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; |
| 1040 | SetupTexelBuffer(image, entry); | 1066 | SetupUniformTexels(image, entry); |
| 1041 | } | 1067 | } |
| 1042 | } | 1068 | } |
| 1043 | 1069 | ||
| @@ -1052,6 +1078,15 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | |||
| 1052 | } | 1078 | } |
| 1053 | } | 1079 | } |
| 1054 | 1080 | ||
| 1081 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | ||
| 1082 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1083 | const auto& gpu = system.GPU().KeplerCompute(); | ||
| 1084 | for (const auto& entry : entries.storage_texels) { | ||
| 1085 | const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; | ||
| 1086 | SetupStorageTexel(image, entry); | ||
| 1087 | } | ||
| 1088 | } | ||
| 1089 | |||
| 1055 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | 1090 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |
| 1056 | MICROPROFILE_SCOPE(Vulkan_Images); | 1091 | MICROPROFILE_SCOPE(Vulkan_Images); |
| 1057 | const auto& gpu = system.GPU().KeplerCompute(); | 1092 | const auto& gpu = system.GPU().KeplerCompute(); |
| @@ -1101,8 +1136,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd | |||
| 1101 | update_descriptor_queue.AddBuffer(buffer, offset, size); | 1136 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| 1102 | } | 1137 | } |
| 1103 | 1138 | ||
| 1104 | void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, | 1139 | void RasterizerVulkan::SetupUniformTexels(const Tegra::Texture::TICEntry& tic, |
| 1105 | const TexelBufferEntry& entry) { | 1140 | const UniformTexelEntry& entry) { |
| 1106 | const auto view = texture_cache.GetTextureSurface(tic, entry); | 1141 | const auto view = texture_cache.GetTextureSurface(tic, entry); |
| 1107 | ASSERT(view->IsBufferView()); | 1142 | ASSERT(view->IsBufferView()); |
| 1108 | 1143 | ||
| @@ -1114,8 +1149,8 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu | |||
| 1114 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); | 1149 | auto view = texture_cache.GetTextureSurface(texture.tic, entry); |
| 1115 | ASSERT(!view->IsBufferView()); | 1150 | ASSERT(!view->IsBufferView()); |
| 1116 | 1151 | ||
| 1117 | const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, | 1152 | const VkImageView image_view = view->GetImageView(texture.tic.x_source, texture.tic.y_source, |
| 1118 | texture.tic.z_source, texture.tic.w_source); | 1153 | texture.tic.z_source, texture.tic.w_source); |
| 1119 | const auto sampler = sampler_cache.GetSampler(texture.tsc); | 1154 | const auto sampler = sampler_cache.GetSampler(texture.tsc); |
| 1120 | update_descriptor_queue.AddSampledImage(sampler, image_view); | 1155 | update_descriptor_queue.AddSampledImage(sampler, image_view); |
| 1121 | 1156 | ||
| @@ -1124,6 +1159,14 @@ void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& textu | |||
| 1124 | sampled_views.push_back(ImageView{std::move(view), image_layout}); | 1159 | sampled_views.push_back(ImageView{std::move(view), image_layout}); |
| 1125 | } | 1160 | } |
| 1126 | 1161 | ||
| 1162 | void RasterizerVulkan::SetupStorageTexel(const Tegra::Texture::TICEntry& tic, | ||
| 1163 | const StorageTexelEntry& entry) { | ||
| 1164 | const auto view = texture_cache.GetImageSurface(tic, entry); | ||
| 1165 | ASSERT(view->IsBufferView()); | ||
| 1166 | |||
| 1167 | update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); | ||
| 1168 | } | ||
| 1169 | |||
| 1127 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { | 1170 | void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { |
| 1128 | auto view = texture_cache.GetImageSurface(tic, entry); | 1171 | auto view = texture_cache.GetImageSurface(tic, entry); |
| 1129 | 1172 | ||
| @@ -1133,7 +1176,8 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima | |||
| 1133 | 1176 | ||
| 1134 | UNIMPLEMENTED_IF(tic.IsBuffer()); | 1177 | UNIMPLEMENTED_IF(tic.IsBuffer()); |
| 1135 | 1178 | ||
| 1136 | const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | 1179 | const VkImageView image_view = |
| 1180 | view->GetImageView(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1137 | update_descriptor_queue.AddImage(image_view); | 1181 | update_descriptor_queue.AddImage(image_view); |
| 1138 | 1182 | ||
| 1139 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); | 1183 | const auto image_layout = update_descriptor_queue.GetLastImageLayout(); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 0ed0e48c6..c8c187606 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -168,7 +168,7 @@ private: | |||
| 168 | bool is_indexed, bool is_instanced); | 168 | bool is_indexed, bool is_instanced); |
| 169 | 169 | ||
| 170 | /// Setup descriptors in the graphics pipeline. | 170 | /// Setup descriptors in the graphics pipeline. |
| 171 | void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); | 171 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); |
| 172 | 172 | ||
| 173 | void SetupImageTransitions(Texceptions texceptions, | 173 | void SetupImageTransitions(Texceptions texceptions, |
| 174 | const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | 174 | const std::array<View, Maxwell::NumRenderTargets>& color_attachments, |
| @@ -193,12 +193,15 @@ private: | |||
| 193 | /// Setup global buffers in the graphics pipeline. | 193 | /// Setup global buffers in the graphics pipeline. |
| 194 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | 194 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); |
| 195 | 195 | ||
| 196 | /// Setup texel buffers in the graphics pipeline. | 196 | /// Setup uniform texels in the graphics pipeline. |
| 197 | void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); | 197 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); |
| 198 | 198 | ||
| 199 | /// Setup textures in the graphics pipeline. | 199 | /// Setup textures in the graphics pipeline. |
| 200 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); | 200 | void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); |
| 201 | 201 | ||
| 202 | /// Setup storage texels in the graphics pipeline. | ||
| 203 | void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage); | ||
| 204 | |||
| 202 | /// Setup images in the graphics pipeline. | 205 | /// Setup images in the graphics pipeline. |
| 203 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | 206 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); |
| 204 | 207 | ||
| @@ -209,11 +212,14 @@ private: | |||
| 209 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | 212 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); |
| 210 | 213 | ||
| 211 | /// Setup texel buffers in the compute pipeline. | 214 | /// Setup texel buffers in the compute pipeline. |
| 212 | void SetupComputeTexelBuffers(const ShaderEntries& entries); | 215 | void SetupComputeUniformTexels(const ShaderEntries& entries); |
| 213 | 216 | ||
| 214 | /// Setup textures in the compute pipeline. | 217 | /// Setup textures in the compute pipeline. |
| 215 | void SetupComputeTextures(const ShaderEntries& entries); | 218 | void SetupComputeTextures(const ShaderEntries& entries); |
| 216 | 219 | ||
| 220 | /// Setup storage texels in the compute pipeline. | ||
| 221 | void SetupComputeStorageTexels(const ShaderEntries& entries); | ||
| 222 | |||
| 217 | /// Setup images in the compute pipeline. | 223 | /// Setup images in the compute pipeline. |
| 218 | void SetupComputeImages(const ShaderEntries& entries); | 224 | void SetupComputeImages(const ShaderEntries& entries); |
| 219 | 225 | ||
| @@ -222,10 +228,12 @@ private: | |||
| 222 | 228 | ||
| 223 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | 229 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); |
| 224 | 230 | ||
| 225 | void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); | 231 | void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry); |
| 226 | 232 | ||
| 227 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); | 233 | void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); |
| 228 | 234 | ||
| 235 | void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry); | ||
| 236 | |||
| 229 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | 237 | void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
| 230 | 238 | ||
| 231 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 239 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 167e20e91..97429cc59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -400,8 +400,9 @@ private: | |||
| 400 | u32 binding = specialization.base_binding; | 400 | u32 binding = specialization.base_binding; |
| 401 | binding = DeclareConstantBuffers(binding); | 401 | binding = DeclareConstantBuffers(binding); |
| 402 | binding = DeclareGlobalBuffers(binding); | 402 | binding = DeclareGlobalBuffers(binding); |
| 403 | binding = DeclareTexelBuffers(binding); | 403 | binding = DeclareUniformTexels(binding); |
| 404 | binding = DeclareSamplers(binding); | 404 | binding = DeclareSamplers(binding); |
| 405 | binding = DeclareStorageTexels(binding); | ||
| 405 | binding = DeclareImages(binding); | 406 | binding = DeclareImages(binding); |
| 406 | 407 | ||
| 407 | const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); | 408 | const Id main = OpFunction(t_void, {}, TypeFunction(t_void)); |
| @@ -515,6 +516,16 @@ private: | |||
| 515 | void DeclareCommon() { | 516 | void DeclareCommon() { |
| 516 | thread_id = | 517 | thread_id = |
| 517 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); | 518 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); |
| 519 | thread_masks[0] = | ||
| 520 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); | ||
| 521 | thread_masks[1] = | ||
| 522 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); | ||
| 523 | thread_masks[2] = | ||
| 524 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); | ||
| 525 | thread_masks[3] = | ||
| 526 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); | ||
| 527 | thread_masks[4] = | ||
| 528 | DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); | ||
| 518 | } | 529 | } |
| 519 | 530 | ||
| 520 | void DeclareVertex() { | 531 | void DeclareVertex() { |
| @@ -731,8 +742,10 @@ private: | |||
| 731 | if (!IsGenericAttribute(index)) { | 742 | if (!IsGenericAttribute(index)) { |
| 732 | continue; | 743 | continue; |
| 733 | } | 744 | } |
| 734 | |||
| 735 | const u32 location = GetGenericAttributeLocation(index); | 745 | const u32 location = GetGenericAttributeLocation(index); |
| 746 | if (!IsAttributeEnabled(location)) { | ||
| 747 | continue; | ||
| 748 | } | ||
| 736 | const auto type_descriptor = GetAttributeType(location); | 749 | const auto type_descriptor = GetAttributeType(location); |
| 737 | Id type; | 750 | Id type; |
| 738 | if (IsInputAttributeArray()) { | 751 | if (IsInputAttributeArray()) { |
| @@ -877,7 +890,7 @@ private: | |||
| 877 | return binding; | 890 | return binding; |
| 878 | } | 891 | } |
| 879 | 892 | ||
| 880 | u32 DeclareTexelBuffers(u32 binding) { | 893 | u32 DeclareUniformTexels(u32 binding) { |
| 881 | for (const auto& sampler : ir.GetSamplers()) { | 894 | for (const auto& sampler : ir.GetSamplers()) { |
| 882 | if (!sampler.is_buffer) { | 895 | if (!sampler.is_buffer) { |
| 883 | continue; | 896 | continue; |
| @@ -898,7 +911,7 @@ private: | |||
| 898 | Decorate(id, spv::Decoration::Binding, binding++); | 911 | Decorate(id, spv::Decoration::Binding, binding++); |
| 899 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | 912 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); |
| 900 | 913 | ||
| 901 | texel_buffers.emplace(sampler.index, TexelBuffer{image_type, id}); | 914 | uniform_texels.emplace(sampler.index, TexelBuffer{image_type, id}); |
| 902 | } | 915 | } |
| 903 | return binding; | 916 | return binding; |
| 904 | } | 917 | } |
| @@ -933,31 +946,48 @@ private: | |||
| 933 | return binding; | 946 | return binding; |
| 934 | } | 947 | } |
| 935 | 948 | ||
| 936 | u32 DeclareImages(u32 binding) { | 949 | u32 DeclareStorageTexels(u32 binding) { |
| 937 | for (const auto& image : ir.GetImages()) { | 950 | for (const auto& image : ir.GetImages()) { |
| 938 | const auto [dim, arrayed] = GetImageDim(image); | 951 | if (image.type != Tegra::Shader::ImageType::TextureBuffer) { |
| 939 | constexpr int depth = 0; | 952 | continue; |
| 940 | constexpr bool ms = false; | ||
| 941 | constexpr int sampled = 2; // This won't be accessed with a sampler | ||
| 942 | constexpr auto format = spv::ImageFormat::Unknown; | ||
| 943 | const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); | ||
| 944 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 945 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 946 | AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); | ||
| 947 | |||
| 948 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 949 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 950 | if (image.is_read && !image.is_written) { | ||
| 951 | Decorate(id, spv::Decoration::NonWritable); | ||
| 952 | } else if (image.is_written && !image.is_read) { | ||
| 953 | Decorate(id, spv::Decoration::NonReadable); | ||
| 954 | } | 953 | } |
| 954 | DeclareImage(image, binding); | ||
| 955 | } | ||
| 956 | return binding; | ||
| 957 | } | ||
| 955 | 958 | ||
| 956 | images.emplace(image.index, StorageImage{image_type, id}); | 959 | u32 DeclareImages(u32 binding) { |
| 960 | for (const auto& image : ir.GetImages()) { | ||
| 961 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { | ||
| 962 | continue; | ||
| 963 | } | ||
| 964 | DeclareImage(image, binding); | ||
| 957 | } | 965 | } |
| 958 | return binding; | 966 | return binding; |
| 959 | } | 967 | } |
| 960 | 968 | ||
| 969 | void DeclareImage(const Image& image, u32& binding) { | ||
| 970 | const auto [dim, arrayed] = GetImageDim(image); | ||
| 971 | constexpr int depth = 0; | ||
| 972 | constexpr bool ms = false; | ||
| 973 | constexpr int sampled = 2; // This won't be accessed with a sampler | ||
| 974 | const auto format = image.is_atomic ? spv::ImageFormat::R32ui : spv::ImageFormat::Unknown; | ||
| 975 | const Id image_type = TypeImage(t_uint, dim, depth, arrayed, ms, sampled, format, {}); | ||
| 976 | const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, image_type); | ||
| 977 | const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); | ||
| 978 | AddGlobalVariable(Name(id, fmt::format("image_{}", image.index))); | ||
| 979 | |||
| 980 | Decorate(id, spv::Decoration::Binding, binding++); | ||
| 981 | Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); | ||
| 982 | if (image.is_read && !image.is_written) { | ||
| 983 | Decorate(id, spv::Decoration::NonWritable); | ||
| 984 | } else if (image.is_written && !image.is_read) { | ||
| 985 | Decorate(id, spv::Decoration::NonReadable); | ||
| 986 | } | ||
| 987 | |||
| 988 | images.emplace(image.index, StorageImage{image_type, id}); | ||
| 989 | } | ||
| 990 | |||
| 961 | bool IsRenderTargetEnabled(u32 rt) const { | 991 | bool IsRenderTargetEnabled(u32 rt) const { |
| 962 | for (u32 component = 0; component < 4; ++component) { | 992 | for (u32 component = 0; component < 4; ++component) { |
| 963 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { | 993 | if (header.ps.IsColorComponentOutputEnabled(rt, component)) { |
| @@ -976,6 +1006,10 @@ private: | |||
| 976 | return stage == ShaderType::TesselationControl; | 1006 | return stage == ShaderType::TesselationControl; |
| 977 | } | 1007 | } |
| 978 | 1008 | ||
| 1009 | bool IsAttributeEnabled(u32 location) const { | ||
| 1010 | return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; | ||
| 1011 | } | ||
| 1012 | |||
| 979 | u32 GetNumInputVertices() const { | 1013 | u32 GetNumInputVertices() const { |
| 980 | switch (stage) { | 1014 | switch (stage) { |
| 981 | case ShaderType::Geometry: | 1015 | case ShaderType::Geometry: |
| @@ -1071,8 +1105,7 @@ private: | |||
| 1071 | 1105 | ||
| 1072 | void VisitBasicBlock(const NodeBlock& bb) { | 1106 | void VisitBasicBlock(const NodeBlock& bb) { |
| 1073 | for (const auto& node : bb) { | 1107 | for (const auto& node : bb) { |
| 1074 | [[maybe_unused]] const Type type = Visit(node).type; | 1108 | Visit(node); |
| 1075 | ASSERT(type == Type::Void); | ||
| 1076 | } | 1109 | } |
| 1077 | } | 1110 | } |
| 1078 | 1111 | ||
| @@ -1192,16 +1225,20 @@ private: | |||
| 1192 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | 1225 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); |
| 1193 | return {v_float_zero, Type::Float}; | 1226 | return {v_float_zero, Type::Float}; |
| 1194 | default: | 1227 | default: |
| 1195 | if (IsGenericAttribute(attribute)) { | 1228 | if (!IsGenericAttribute(attribute)) { |
| 1196 | const u32 location = GetGenericAttributeLocation(attribute); | 1229 | break; |
| 1197 | const auto type_descriptor = GetAttributeType(location); | ||
| 1198 | const Type type = type_descriptor.type; | ||
| 1199 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1200 | const std::vector elements = {element}; | ||
| 1201 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1202 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1203 | } | 1230 | } |
| 1204 | break; | 1231 | const u32 location = GetGenericAttributeLocation(attribute); |
| 1232 | if (!IsAttributeEnabled(location)) { | ||
| 1233 | // Disabled attributes (also known as constant attributes) always return zero. | ||
| 1234 | return {v_float_zero, Type::Float}; | ||
| 1235 | } | ||
| 1236 | const auto type_descriptor = GetAttributeType(location); | ||
| 1237 | const Type type = type_descriptor.type; | ||
| 1238 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1239 | const std::vector elements = {element}; | ||
| 1240 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1241 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1205 | } | 1242 | } |
| 1206 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | 1243 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); |
| 1207 | return {v_float_zero, Type::Float}; | 1244 | return {v_float_zero, Type::Float}; |
| @@ -1237,7 +1274,7 @@ private: | |||
| 1237 | } else { | 1274 | } else { |
| 1238 | UNREACHABLE_MSG("Unmanaged offset node type"); | 1275 | UNREACHABLE_MSG("Unmanaged offset node type"); |
| 1239 | } | 1276 | } |
| 1240 | pointer = OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), buffer_index, | 1277 | pointer = OpAccessChain(t_cbuf_float, buffer_id, v_uint_zero, buffer_index, |
| 1241 | buffer_element); | 1278 | buffer_element); |
| 1242 | } | 1279 | } |
| 1243 | return {OpLoad(t_float, pointer), Type::Float}; | 1280 | return {OpLoad(t_float, pointer), Type::Float}; |
| @@ -1362,7 +1399,9 @@ private: | |||
| 1362 | Expression target{}; | 1399 | Expression target{}; |
| 1363 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { | 1400 | if (const auto gpr = std::get_if<GprNode>(&*dest)) { |
| 1364 | if (gpr->GetIndex() == Register::ZeroIndex) { | 1401 | if (gpr->GetIndex() == Register::ZeroIndex) { |
| 1365 | // Writing to Register::ZeroIndex is a no op | 1402 | // Writing to Register::ZeroIndex is a no op but we still have to visit its source |
| 1403 | // because it might have side effects. | ||
| 1404 | Visit(src); | ||
| 1366 | return {}; | 1405 | return {}; |
| 1367 | } | 1406 | } |
| 1368 | target = {registers.at(gpr->GetIndex()), Type::Float}; | 1407 | target = {registers.at(gpr->GetIndex()), Type::Float}; |
| @@ -1590,7 +1629,7 @@ private: | |||
| 1590 | 1629 | ||
| 1591 | const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); | 1630 | const Id result = OpIAddCarry(TypeStruct({t_uint, t_uint}), op_a, op_b); |
| 1592 | const Id carry = OpCompositeExtract(t_uint, result, 1); | 1631 | const Id carry = OpCompositeExtract(t_uint, result, 1); |
| 1593 | return {OpINotEqual(t_bool, carry, Constant(t_uint, 0)), Type::Bool}; | 1632 | return {OpINotEqual(t_bool, carry, v_uint_zero), Type::Bool}; |
| 1594 | } | 1633 | } |
| 1595 | 1634 | ||
| 1596 | Expression LogicalAssign(Operation operation) { | 1635 | Expression LogicalAssign(Operation operation) { |
| @@ -1653,7 +1692,7 @@ private: | |||
| 1653 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); | 1692 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); |
| 1654 | const u32 index = meta.sampler.index; | 1693 | const u32 index = meta.sampler.index; |
| 1655 | if (meta.sampler.is_buffer) { | 1694 | if (meta.sampler.is_buffer) { |
| 1656 | const auto& entry = texel_buffers.at(index); | 1695 | const auto& entry = uniform_texels.at(index); |
| 1657 | return OpLoad(entry.image_type, entry.image); | 1696 | return OpLoad(entry.image_type, entry.image); |
| 1658 | } else { | 1697 | } else { |
| 1659 | const auto& entry = sampled_images.at(index); | 1698 | const auto& entry = sampled_images.at(index); |
| @@ -1930,39 +1969,20 @@ private: | |||
| 1930 | return {}; | 1969 | return {}; |
| 1931 | } | 1970 | } |
| 1932 | 1971 | ||
| 1933 | Expression AtomicImageAdd(Operation operation) { | 1972 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> |
| 1934 | UNIMPLEMENTED(); | 1973 | Expression AtomicImage(Operation operation) { |
| 1935 | return {}; | 1974 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1936 | } | 1975 | ASSERT(meta.values.size() == 1); |
| 1937 | |||
| 1938 | Expression AtomicImageMin(Operation operation) { | ||
| 1939 | UNIMPLEMENTED(); | ||
| 1940 | return {}; | ||
| 1941 | } | ||
| 1942 | |||
| 1943 | Expression AtomicImageMax(Operation operation) { | ||
| 1944 | UNIMPLEMENTED(); | ||
| 1945 | return {}; | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | Expression AtomicImageAnd(Operation operation) { | ||
| 1949 | UNIMPLEMENTED(); | ||
| 1950 | return {}; | ||
| 1951 | } | ||
| 1952 | |||
| 1953 | Expression AtomicImageOr(Operation operation) { | ||
| 1954 | UNIMPLEMENTED(); | ||
| 1955 | return {}; | ||
| 1956 | } | ||
| 1957 | 1976 | ||
| 1958 | Expression AtomicImageXor(Operation operation) { | 1977 | const Id coordinate = GetCoordinates(operation, Type::Int); |
| 1959 | UNIMPLEMENTED(); | 1978 | const Id image = images.at(meta.image.index).image; |
| 1960 | return {}; | 1979 | const Id sample = v_uint_zero; |
| 1961 | } | 1980 | const Id pointer = OpImageTexelPointer(t_image_uint, image, coordinate, sample); |
| 1962 | 1981 | ||
| 1963 | Expression AtomicImageExchange(Operation operation) { | 1982 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); |
| 1964 | UNIMPLEMENTED(); | 1983 | const Id semantics = v_uint_zero; |
| 1965 | return {}; | 1984 | const Id value = AsUint(Visit(meta.values[0])); |
| 1985 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | ||
| 1966 | } | 1986 | } |
| 1967 | 1987 | ||
| 1968 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> | 1988 | template <Id (Module::*func)(Id, Id, Id, Id, Id)> |
| @@ -1977,7 +1997,7 @@ private: | |||
| 1977 | return {v_float_zero, Type::Float}; | 1997 | return {v_float_zero, Type::Float}; |
| 1978 | } | 1998 | } |
| 1979 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | 1999 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); |
| 1980 | const Id semantics = Constant(t_uint, 0); | 2000 | const Id semantics = v_uint_zero; |
| 1981 | const Id value = AsUint(Visit(operation[1])); | 2001 | const Id value = AsUint(Visit(operation[1])); |
| 1982 | 2002 | ||
| 1983 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | 2003 | return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; |
| @@ -2175,14 +2195,37 @@ private: | |||
| 2175 | return {OpLoad(t_uint, thread_id), Type::Uint}; | 2195 | return {OpLoad(t_uint, thread_id), Type::Uint}; |
| 2176 | } | 2196 | } |
| 2177 | 2197 | ||
| 2198 | template <std::size_t index> | ||
| 2199 | Expression ThreadMask(Operation) { | ||
| 2200 | // TODO(Rodrigo): Handle devices with different warp sizes | ||
| 2201 | const Id mask = thread_masks[index]; | ||
| 2202 | return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; | ||
| 2203 | } | ||
| 2204 | |||
| 2178 | Expression ShuffleIndexed(Operation operation) { | 2205 | Expression ShuffleIndexed(Operation operation) { |
| 2179 | const Id value = AsFloat(Visit(operation[0])); | 2206 | const Id value = AsFloat(Visit(operation[0])); |
| 2180 | const Id index = AsUint(Visit(operation[1])); | 2207 | const Id index = AsUint(Visit(operation[1])); |
| 2181 | return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; | 2208 | return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; |
| 2182 | } | 2209 | } |
| 2183 | 2210 | ||
| 2184 | Expression MemoryBarrierGL(Operation) { | 2211 | Expression Barrier(Operation) { |
| 2185 | const auto scope = spv::Scope::Device; | 2212 | if (!ir.IsDecompiled()) { |
| 2213 | LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled"); | ||
| 2214 | return {}; | ||
| 2215 | } | ||
| 2216 | |||
| 2217 | const auto scope = spv::Scope::Workgroup; | ||
| 2218 | const auto memory = spv::Scope::Workgroup; | ||
| 2219 | const auto semantics = | ||
| 2220 | spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease; | ||
| 2221 | OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)), | ||
| 2222 | Constant(t_uint, static_cast<u32>(memory)), | ||
| 2223 | Constant(t_uint, static_cast<u32>(semantics))); | ||
| 2224 | return {}; | ||
| 2225 | } | ||
| 2226 | |||
| 2227 | template <spv::Scope scope> | ||
| 2228 | Expression MemoryBarrier(Operation) { | ||
| 2186 | const auto semantics = | 2229 | const auto semantics = |
| 2187 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | | 2230 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | |
| 2188 | spv::MemorySemanticsMask::WorkgroupMemory | | 2231 | spv::MemorySemanticsMask::WorkgroupMemory | |
| @@ -2578,11 +2621,11 @@ private: | |||
| 2578 | 2621 | ||
| 2579 | &SPIRVDecompiler::ImageLoad, | 2622 | &SPIRVDecompiler::ImageLoad, |
| 2580 | &SPIRVDecompiler::ImageStore, | 2623 | &SPIRVDecompiler::ImageStore, |
| 2581 | &SPIRVDecompiler::AtomicImageAdd, | 2624 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicIAdd>, |
| 2582 | &SPIRVDecompiler::AtomicImageAnd, | 2625 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicAnd>, |
| 2583 | &SPIRVDecompiler::AtomicImageOr, | 2626 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicOr>, |
| 2584 | &SPIRVDecompiler::AtomicImageXor, | 2627 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicXor>, |
| 2585 | &SPIRVDecompiler::AtomicImageExchange, | 2628 | &SPIRVDecompiler::AtomicImage<&Module::OpAtomicExchange>, |
| 2586 | 2629 | ||
| 2587 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | 2630 | &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, |
| 2588 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | 2631 | &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, |
| @@ -2639,9 +2682,16 @@ private: | |||
| 2639 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, | 2682 | &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, |
| 2640 | 2683 | ||
| 2641 | &SPIRVDecompiler::ThreadId, | 2684 | &SPIRVDecompiler::ThreadId, |
| 2685 | &SPIRVDecompiler::ThreadMask<0>, // Eq | ||
| 2686 | &SPIRVDecompiler::ThreadMask<1>, // Ge | ||
| 2687 | &SPIRVDecompiler::ThreadMask<2>, // Gt | ||
| 2688 | &SPIRVDecompiler::ThreadMask<3>, // Le | ||
| 2689 | &SPIRVDecompiler::ThreadMask<4>, // Lt | ||
| 2642 | &SPIRVDecompiler::ShuffleIndexed, | 2690 | &SPIRVDecompiler::ShuffleIndexed, |
| 2643 | 2691 | ||
| 2644 | &SPIRVDecompiler::MemoryBarrierGL, | 2692 | &SPIRVDecompiler::Barrier, |
| 2693 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, | ||
| 2694 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, | ||
| 2645 | }; | 2695 | }; |
| 2646 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2696 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2647 | 2697 | ||
| @@ -2717,8 +2767,11 @@ private: | |||
| 2717 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | 2767 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); |
| 2718 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | 2768 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); |
| 2719 | 2769 | ||
| 2770 | const Id t_image_uint = TypePointer(spv::StorageClass::Image, t_uint); | ||
| 2771 | |||
| 2720 | const Id v_float_zero = Constant(t_float, 0.0f); | 2772 | const Id v_float_zero = Constant(t_float, 0.0f); |
| 2721 | const Id v_float_one = Constant(t_float, 1.0f); | 2773 | const Id v_float_one = Constant(t_float, 1.0f); |
| 2774 | const Id v_uint_zero = Constant(t_uint, 0); | ||
| 2722 | 2775 | ||
| 2723 | // Nvidia uses these defaults for varyings (e.g. position and generic attributes) | 2776 | // Nvidia uses these defaults for varyings (e.g. position and generic attributes) |
| 2724 | const Id v_varying_default = | 2777 | const Id v_varying_default = |
| @@ -2743,15 +2796,16 @@ private: | |||
| 2743 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; | 2796 | std::unordered_map<u8, GenericVaryingDescription> output_attributes; |
| 2744 | std::map<u32, Id> constant_buffers; | 2797 | std::map<u32, Id> constant_buffers; |
| 2745 | std::map<GlobalMemoryBase, Id> global_buffers; | 2798 | std::map<GlobalMemoryBase, Id> global_buffers; |
| 2746 | std::map<u32, TexelBuffer> texel_buffers; | 2799 | std::map<u32, TexelBuffer> uniform_texels; |
| 2747 | std::map<u32, SampledImage> sampled_images; | 2800 | std::map<u32, SampledImage> sampled_images; |
| 2801 | std::map<u32, TexelBuffer> storage_texels; | ||
| 2748 | std::map<u32, StorageImage> images; | 2802 | std::map<u32, StorageImage> images; |
| 2749 | 2803 | ||
| 2804 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; | ||
| 2750 | Id instance_index{}; | 2805 | Id instance_index{}; |
| 2751 | Id vertex_index{}; | 2806 | Id vertex_index{}; |
| 2752 | Id base_instance{}; | 2807 | Id base_instance{}; |
| 2753 | Id base_vertex{}; | 2808 | Id base_vertex{}; |
| 2754 | std::array<Id, Maxwell::NumRenderTargets> frag_colors{}; | ||
| 2755 | Id frag_depth{}; | 2809 | Id frag_depth{}; |
| 2756 | Id frag_coord{}; | 2810 | Id frag_coord{}; |
| 2757 | Id front_facing{}; | 2811 | Id front_facing{}; |
| @@ -2763,6 +2817,7 @@ private: | |||
| 2763 | Id workgroup_id{}; | 2817 | Id workgroup_id{}; |
| 2764 | Id local_invocation_id{}; | 2818 | Id local_invocation_id{}; |
| 2765 | Id thread_id{}; | 2819 | Id thread_id{}; |
| 2820 | std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt | ||
| 2766 | 2821 | ||
| 2767 | VertexIndices in_indices; | 2822 | VertexIndices in_indices; |
| 2768 | VertexIndices out_indices; | 2823 | VertexIndices out_indices; |
| @@ -3006,13 +3061,17 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3006 | } | 3061 | } |
| 3007 | for (const auto& sampler : ir.GetSamplers()) { | 3062 | for (const auto& sampler : ir.GetSamplers()) { |
| 3008 | if (sampler.is_buffer) { | 3063 | if (sampler.is_buffer) { |
| 3009 | entries.texel_buffers.emplace_back(sampler); | 3064 | entries.uniform_texels.emplace_back(sampler); |
| 3010 | } else { | 3065 | } else { |
| 3011 | entries.samplers.emplace_back(sampler); | 3066 | entries.samplers.emplace_back(sampler); |
| 3012 | } | 3067 | } |
| 3013 | } | 3068 | } |
| 3014 | for (const auto& image : ir.GetImages()) { | 3069 | for (const auto& image : ir.GetImages()) { |
| 3015 | entries.images.emplace_back(image); | 3070 | if (image.type == Tegra::Shader::ImageType::TextureBuffer) { |
| 3071 | entries.storage_texels.emplace_back(image); | ||
| 3072 | } else { | ||
| 3073 | entries.images.emplace_back(image); | ||
| 3074 | } | ||
| 3016 | } | 3075 | } |
| 3017 | for (const auto& attribute : ir.GetInputAttributes()) { | 3076 | for (const auto& attribute : ir.GetInputAttributes()) { |
| 3018 | if (IsGenericAttribute(attribute)) { | 3077 | if (IsGenericAttribute(attribute)) { |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f4c05ac3c..2b0e90396 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -21,8 +21,9 @@ class VKDevice; | |||
| 21 | namespace Vulkan { | 21 | namespace Vulkan { |
| 22 | 22 | ||
| 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 23 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 24 | using TexelBufferEntry = VideoCommon::Shader::Sampler; | 24 | using UniformTexelEntry = VideoCommon::Shader::Sampler; |
| 25 | using SamplerEntry = VideoCommon::Shader::Sampler; | 25 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 26 | using StorageTexelEntry = VideoCommon::Shader::Image; | ||
| 26 | using ImageEntry = VideoCommon::Shader::Image; | 27 | using ImageEntry = VideoCommon::Shader::Image; |
| 27 | 28 | ||
| 28 | constexpr u32 DESCRIPTOR_SET = 0; | 29 | constexpr u32 DESCRIPTOR_SET = 0; |
| @@ -66,13 +67,15 @@ private: | |||
| 66 | struct ShaderEntries { | 67 | struct ShaderEntries { |
| 67 | u32 NumBindings() const { | 68 | u32 NumBindings() const { |
| 68 | return static_cast<u32>(const_buffers.size() + global_buffers.size() + | 69 | return static_cast<u32>(const_buffers.size() + global_buffers.size() + |
| 69 | texel_buffers.size() + samplers.size() + images.size()); | 70 | uniform_texels.size() + samplers.size() + storage_texels.size() + |
| 71 | images.size()); | ||
| 70 | } | 72 | } |
| 71 | 73 | ||
| 72 | std::vector<ConstBufferEntry> const_buffers; | 74 | std::vector<ConstBufferEntry> const_buffers; |
| 73 | std::vector<GlobalBufferEntry> global_buffers; | 75 | std::vector<GlobalBufferEntry> global_buffers; |
| 74 | std::vector<TexelBufferEntry> texel_buffers; | 76 | std::vector<UniformTexelEntry> uniform_texels; |
| 75 | std::vector<SamplerEntry> samplers; | 77 | std::vector<SamplerEntry> samplers; |
| 78 | std::vector<StorageTexelEntry> storage_texels; | ||
| 76 | std::vector<ImageEntry> images; | 79 | std::vector<ImageEntry> images; |
| 77 | std::set<u32> attributes; | 80 | std::set<u32> attributes; |
| 78 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 81 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| @@ -88,7 +91,8 @@ struct Specialization final { | |||
| 88 | u32 shared_memory_size{}; | 91 | u32 shared_memory_size{}; |
| 89 | 92 | ||
| 90 | // Graphics specific | 93 | // Graphics specific |
| 91 | std::optional<float> point_size{}; | 94 | std::optional<float> point_size; |
| 95 | std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; | ||
| 92 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 96 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 93 | bool ndc_minus_one_to_one{}; | 97 | bool ndc_minus_one_to_one{}; |
| 94 | }; | 98 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index dfddf7ad6..c765c60a0 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h | |||
| @@ -35,7 +35,7 @@ public: | |||
| 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. | 35 | /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. |
| 36 | void Unmap(u64 size); | 36 | void Unmap(u64 size); |
| 37 | 37 | ||
| 38 | VkBuffer GetHandle() const { | 38 | VkBuffer Handle() const { |
| 39 | return *buffer; | 39 | return *buffer; |
| 40 | } | 40 | } |
| 41 | 41 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 55f43e61b..430031665 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -100,8 +100,8 @@ vk::Buffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, | |||
| 100 | ci.pNext = nullptr; | 100 | ci.pNext = nullptr; |
| 101 | ci.flags = 0; | 101 | ci.flags = 0; |
| 102 | ci.size = static_cast<VkDeviceSize>(host_memory_size); | 102 | ci.size = static_cast<VkDeviceSize>(host_memory_size); |
| 103 | ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | | 103 | ci.usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | |
| 104 | VK_BUFFER_USAGE_TRANSFER_DST_BIT; | 104 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; |
| 105 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; | 105 | ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; |
| 106 | ci.queueFamilyIndexCount = 0; | 106 | ci.queueFamilyIndexCount = 0; |
| 107 | ci.pQueueFamilyIndices = nullptr; | 107 | ci.pQueueFamilyIndices = nullptr; |
| @@ -167,6 +167,7 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP | |||
| 167 | ci.extent = {params.width, params.height, 1}; | 167 | ci.extent = {params.width, params.height, 1}; |
| 168 | break; | 168 | break; |
| 169 | case SurfaceTarget::Texture3D: | 169 | case SurfaceTarget::Texture3D: |
| 170 | ci.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT; | ||
| 170 | ci.extent = {params.width, params.height, params.depth}; | 171 | ci.extent = {params.width, params.height, params.depth}; |
| 171 | break; | 172 | break; |
| 172 | case SurfaceTarget::TextureBuffer: | 173 | case SurfaceTarget::TextureBuffer: |
| @@ -176,6 +177,12 @@ VkImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceP | |||
| 176 | return ci; | 177 | return ci; |
| 177 | } | 178 | } |
| 178 | 179 | ||
| 180 | u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::SwizzleSource y_source, | ||
| 181 | Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source) { | ||
| 182 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 183 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 184 | } | ||
| 185 | |||
| 179 | } // Anonymous namespace | 186 | } // Anonymous namespace |
| 180 | 187 | ||
| 181 | CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, | 188 | CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, |
| @@ -203,9 +210,11 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, | |||
| 203 | } | 210 | } |
| 204 | 211 | ||
| 205 | // TODO(Rodrigo): Move this to a virtual function. | 212 | // TODO(Rodrigo): Move this to a virtual function. |
| 206 | main_view = CreateViewInner( | 213 | u32 num_layers = 1; |
| 207 | ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels), | 214 | if (params.is_layered || params.target == SurfaceTarget::Texture3D) { |
| 208 | true); | 215 | num_layers = params.depth; |
| 216 | } | ||
| 217 | main_view = CreateView(ViewParams(params.target, 0, num_layers, 0, params.num_levels)); | ||
| 209 | } | 218 | } |
| 210 | 219 | ||
| 211 | CachedSurface::~CachedSurface() = default; | 220 | CachedSurface::~CachedSurface() = default; |
| @@ -253,12 +262,8 @@ void CachedSurface::DecorateSurfaceName() { | |||
| 253 | } | 262 | } |
| 254 | 263 | ||
| 255 | View CachedSurface::CreateView(const ViewParams& params) { | 264 | View CachedSurface::CreateView(const ViewParams& params) { |
| 256 | return CreateViewInner(params, false); | ||
| 257 | } | ||
| 258 | |||
| 259 | View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) { | ||
| 260 | // TODO(Rodrigo): Add name decorations | 265 | // TODO(Rodrigo): Add name decorations |
| 261 | return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy); | 266 | return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params); |
| 262 | } | 267 | } |
| 263 | 268 | ||
| 264 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { | 269 | void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { |
| @@ -342,38 +347,44 @@ VkImageSubresourceRange CachedSurface::GetImageSubresourceRange() const { | |||
| 342 | } | 347 | } |
| 343 | 348 | ||
| 344 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | 349 | CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface, |
| 345 | const ViewParams& params, bool is_proxy) | 350 | const ViewParams& params) |
| 346 | : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, | 351 | : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()}, |
| 347 | image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, | 352 | image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()}, |
| 348 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, | 353 | aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface}, |
| 349 | base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level}, | 354 | base_level{params.base_level}, num_levels{params.num_levels}, |
| 350 | num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target) | 355 | image_view_type{image ? GetImageViewType(params.target) : VK_IMAGE_VIEW_TYPE_1D} { |
| 351 | : VK_IMAGE_VIEW_TYPE_1D} {} | 356 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { |
| 357 | base_layer = 0; | ||
| 358 | num_layers = 1; | ||
| 359 | base_slice = params.base_layer; | ||
| 360 | num_slices = params.num_layers; | ||
| 361 | } else { | ||
| 362 | base_layer = params.base_layer; | ||
| 363 | num_layers = params.num_layers; | ||
| 364 | } | ||
| 365 | } | ||
| 352 | 366 | ||
| 353 | CachedSurfaceView::~CachedSurfaceView() = default; | 367 | CachedSurfaceView::~CachedSurfaceView() = default; |
| 354 | 368 | ||
| 355 | VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, | 369 | VkImageView CachedSurfaceView::GetImageView(SwizzleSource x_source, SwizzleSource y_source, |
| 356 | SwizzleSource z_source, SwizzleSource w_source) { | 370 | SwizzleSource z_source, SwizzleSource w_source) { |
| 357 | const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 371 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); |
| 358 | if (last_image_view && last_swizzle == swizzle) { | 372 | if (last_image_view && last_swizzle == new_swizzle) { |
| 359 | return last_image_view; | 373 | return last_image_view; |
| 360 | } | 374 | } |
| 361 | last_swizzle = swizzle; | 375 | last_swizzle = new_swizzle; |
| 362 | 376 | ||
| 363 | const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle); | 377 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); |
| 364 | auto& image_view = entry->second; | 378 | auto& image_view = entry->second; |
| 365 | if (!is_cache_miss) { | 379 | if (!is_cache_miss) { |
| 366 | return last_image_view = *image_view; | 380 | return last_image_view = *image_view; |
| 367 | } | 381 | } |
| 368 | 382 | ||
| 369 | auto swizzle_x = MaxwellToVK::SwizzleSource(x_source); | 383 | std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), |
| 370 | auto swizzle_y = MaxwellToVK::SwizzleSource(y_source); | 384 | MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; |
| 371 | auto swizzle_z = MaxwellToVK::SwizzleSource(z_source); | ||
| 372 | auto swizzle_w = MaxwellToVK::SwizzleSource(w_source); | ||
| 373 | |||
| 374 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { | 385 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { |
| 375 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. | 386 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. |
| 376 | std::swap(swizzle_x, swizzle_z); | 387 | std::swap(swizzle[0], swizzle[2]); |
| 377 | } | 388 | } |
| 378 | 389 | ||
| 379 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on | 390 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on |
| @@ -395,11 +406,16 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y | |||
| 395 | UNIMPLEMENTED(); | 406 | UNIMPLEMENTED(); |
| 396 | } | 407 | } |
| 397 | 408 | ||
| 398 | // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity | 409 | // Make sure we sample the first component |
| 399 | swizzle_x = VK_COMPONENT_SWIZZLE_R; | 410 | std::transform( |
| 400 | swizzle_y = VK_COMPONENT_SWIZZLE_G; | 411 | swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { |
| 401 | swizzle_z = VK_COMPONENT_SWIZZLE_B; | 412 | return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; |
| 402 | swizzle_w = VK_COMPONENT_SWIZZLE_A; | 413 | }); |
| 414 | } | ||
| 415 | |||
| 416 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | ||
| 417 | ASSERT(base_slice == 0); | ||
| 418 | ASSERT(num_slices == params.depth); | ||
| 403 | } | 419 | } |
| 404 | 420 | ||
| 405 | VkImageViewCreateInfo ci; | 421 | VkImageViewCreateInfo ci; |
| @@ -409,7 +425,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y | |||
| 409 | ci.image = surface.GetImageHandle(); | 425 | ci.image = surface.GetImageHandle(); |
| 410 | ci.viewType = image_view_type; | 426 | ci.viewType = image_view_type; |
| 411 | ci.format = surface.GetImage().GetFormat(); | 427 | ci.format = surface.GetImage().GetFormat(); |
| 412 | ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; | 428 | ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; |
| 413 | ci.subresourceRange.aspectMask = aspect; | 429 | ci.subresourceRange.aspectMask = aspect; |
| 414 | ci.subresourceRange.baseMipLevel = base_level; | 430 | ci.subresourceRange.baseMipLevel = base_level; |
| 415 | ci.subresourceRange.levelCount = num_levels; | 431 | ci.subresourceRange.levelCount = num_levels; |
| @@ -420,6 +436,35 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y | |||
| 420 | return last_image_view = *image_view; | 436 | return last_image_view = *image_view; |
| 421 | } | 437 | } |
| 422 | 438 | ||
| 439 | VkImageView CachedSurfaceView::GetAttachment() { | ||
| 440 | if (render_target) { | ||
| 441 | return *render_target; | ||
| 442 | } | ||
| 443 | |||
| 444 | VkImageViewCreateInfo ci; | ||
| 445 | ci.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; | ||
| 446 | ci.pNext = nullptr; | ||
| 447 | ci.flags = 0; | ||
| 448 | ci.image = surface.GetImageHandle(); | ||
| 449 | ci.format = surface.GetImage().GetFormat(); | ||
| 450 | ci.components = {VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, | ||
| 451 | VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY}; | ||
| 452 | ci.subresourceRange.aspectMask = aspect_mask; | ||
| 453 | ci.subresourceRange.baseMipLevel = base_level; | ||
| 454 | ci.subresourceRange.levelCount = num_levels; | ||
| 455 | if (image_view_type == VK_IMAGE_VIEW_TYPE_3D) { | ||
| 456 | ci.viewType = num_slices > 1 ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D; | ||
| 457 | ci.subresourceRange.baseArrayLayer = base_slice; | ||
| 458 | ci.subresourceRange.layerCount = num_slices; | ||
| 459 | } else { | ||
| 460 | ci.viewType = image_view_type; | ||
| 461 | ci.subresourceRange.baseArrayLayer = base_layer; | ||
| 462 | ci.subresourceRange.layerCount = num_layers; | ||
| 463 | } | ||
| 464 | render_target = device.GetLogical().CreateImageView(ci); | ||
| 465 | return *render_target; | ||
| 466 | } | ||
| 467 | |||
| 423 | VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 468 | VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 424 | const VKDevice& device, VKResourceManager& resource_manager, | 469 | const VKDevice& device, VKResourceManager& resource_manager, |
| 425 | VKMemoryManager& memory_manager, VKScheduler& scheduler, | 470 | VKMemoryManager& memory_manager, VKScheduler& scheduler, |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f211ccb1e..807e26c8a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -91,7 +91,6 @@ protected: | |||
| 91 | void DecorateSurfaceName(); | 91 | void DecorateSurfaceName(); |
| 92 | 92 | ||
| 93 | View CreateView(const ViewParams& params) override; | 93 | View CreateView(const ViewParams& params) override; |
| 94 | View CreateViewInner(const ViewParams& params, bool is_proxy); | ||
| 95 | 94 | ||
| 96 | private: | 95 | private: |
| 97 | void UploadBuffer(const std::vector<u8>& staging_buffer); | 96 | void UploadBuffer(const std::vector<u8>& staging_buffer); |
| @@ -120,23 +119,20 @@ private: | |||
| 120 | class CachedSurfaceView final : public VideoCommon::ViewBase { | 119 | class CachedSurfaceView final : public VideoCommon::ViewBase { |
| 121 | public: | 120 | public: |
| 122 | explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, | 121 | explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface, |
| 123 | const ViewParams& params, bool is_proxy); | 122 | const ViewParams& params); |
| 124 | ~CachedSurfaceView(); | 123 | ~CachedSurfaceView(); |
| 125 | 124 | ||
| 126 | VkImageView GetHandle(Tegra::Texture::SwizzleSource x_source, | 125 | VkImageView GetImageView(Tegra::Texture::SwizzleSource x_source, |
| 127 | Tegra::Texture::SwizzleSource y_source, | 126 | Tegra::Texture::SwizzleSource y_source, |
| 128 | Tegra::Texture::SwizzleSource z_source, | 127 | Tegra::Texture::SwizzleSource z_source, |
| 129 | Tegra::Texture::SwizzleSource w_source); | 128 | Tegra::Texture::SwizzleSource w_source); |
| 129 | |||
| 130 | VkImageView GetAttachment(); | ||
| 130 | 131 | ||
| 131 | bool IsSameSurface(const CachedSurfaceView& rhs) const { | 132 | bool IsSameSurface(const CachedSurfaceView& rhs) const { |
| 132 | return &surface == &rhs.surface; | 133 | return &surface == &rhs.surface; |
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | VkImageView GetHandle() { | ||
| 136 | return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G, | ||
| 137 | Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A); | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetWidth() const { | 136 | u32 GetWidth() const { |
| 141 | return params.GetMipWidth(base_level); | 137 | return params.GetMipWidth(base_level); |
| 142 | } | 138 | } |
| @@ -180,14 +176,6 @@ public: | |||
| 180 | } | 176 | } |
| 181 | 177 | ||
| 182 | private: | 178 | private: |
| 183 | static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 184 | Tegra::Texture::SwizzleSource y_source, | ||
| 185 | Tegra::Texture::SwizzleSource z_source, | ||
| 186 | Tegra::Texture::SwizzleSource w_source) { | ||
| 187 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 188 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 189 | } | ||
| 190 | |||
| 191 | // Store a copy of these values to avoid double dereference when reading them | 179 | // Store a copy of these values to avoid double dereference when reading them |
| 192 | const SurfaceParams params; | 180 | const SurfaceParams params; |
| 193 | const VkImage image; | 181 | const VkImage image; |
| @@ -196,15 +184,18 @@ private: | |||
| 196 | 184 | ||
| 197 | const VKDevice& device; | 185 | const VKDevice& device; |
| 198 | CachedSurface& surface; | 186 | CachedSurface& surface; |
| 199 | const u32 base_layer; | ||
| 200 | const u32 num_layers; | ||
| 201 | const u32 base_level; | 187 | const u32 base_level; |
| 202 | const u32 num_levels; | 188 | const u32 num_levels; |
| 203 | const VkImageViewType image_view_type; | 189 | const VkImageViewType image_view_type; |
| 190 | u32 base_layer = 0; | ||
| 191 | u32 num_layers = 0; | ||
| 192 | u32 base_slice = 0; | ||
| 193 | u32 num_slices = 0; | ||
| 204 | 194 | ||
| 205 | VkImageView last_image_view = nullptr; | 195 | VkImageView last_image_view = nullptr; |
| 206 | u32 last_swizzle = 0; | 196 | u32 last_swizzle = 0; |
| 207 | 197 | ||
| 198 | vk::ImageView render_target; | ||
| 208 | std::unordered_map<u32, vk::ImageView> view_cache; | 199 | std::unordered_map<u32, vk::ImageView> view_cache; |
| 209 | }; | 200 | }; |
| 210 | 201 | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 9392f065b..63adbc4a3 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 387 | } | 387 | } |
| 388 | case OpCode::Id::RED: { | 388 | case OpCode::Id::RED: { |
| 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); | 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); |
| 390 | UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); | ||
| 391 | const auto [real_address, base_address, descriptor] = | 390 | const auto [real_address, base_address, descriptor] = |
| 392 | TrackGlobalMemory(bb, instr, true, true); | 391 | TrackGlobalMemory(bb, instr, true, true); |
| 393 | if (!real_address || !base_address) { | 392 | if (!real_address || !base_address) { |
| @@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 396 | } | 395 | } |
| 397 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 396 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 398 | Node value = GetRegister(instr.gpr0); | 397 | Node value = GetRegister(instr.gpr0); |
| 399 | bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); | 398 | bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); |
| 400 | break; | 399 | break; |
| 401 | } | 400 | } |
| 402 | case OpCode::Id::ATOM: { | 401 | case OpCode::Id::ATOM: { |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d4f95b18c..c0a8f233f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 83 | return Operation(OperationCode::YNegate); | 83 | return Operation(OperationCode::YNegate); |
| 84 | case SystemVariable::InvocationInfo: | 84 | case SystemVariable::InvocationInfo: |
| 85 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | 85 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); |
| 86 | return Immediate(0U); | 86 | return Immediate(0x00ff'0000U); |
| 87 | case SystemVariable::WscaleFactorXY: | 87 | case SystemVariable::WscaleFactorXY: |
| 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); | 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); |
| 89 | return Immediate(0U); | 89 | return Immediate(0U); |
| @@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 109 | return Operation(OperationCode::WorkGroupIdY); | 109 | return Operation(OperationCode::WorkGroupIdY); |
| 110 | case SystemVariable::CtaIdZ: | 110 | case SystemVariable::CtaIdZ: |
| 111 | return Operation(OperationCode::WorkGroupIdZ); | 111 | return Operation(OperationCode::WorkGroupIdZ); |
| 112 | case SystemVariable::EqMask: | ||
| 113 | case SystemVariable::LtMask: | ||
| 114 | case SystemVariable::LeMask: | ||
| 115 | case SystemVariable::GtMask: | ||
| 116 | case SystemVariable::GeMask: | ||
| 117 | uses_warps = true; | ||
| 118 | switch (instr.sys20) { | ||
| 119 | case SystemVariable::EqMask: | ||
| 120 | return Operation(OperationCode::ThreadEqMask); | ||
| 121 | case SystemVariable::LtMask: | ||
| 122 | return Operation(OperationCode::ThreadLtMask); | ||
| 123 | case SystemVariable::LeMask: | ||
| 124 | return Operation(OperationCode::ThreadLeMask); | ||
| 125 | case SystemVariable::GtMask: | ||
| 126 | return Operation(OperationCode::ThreadGtMask); | ||
| 127 | case SystemVariable::GeMask: | ||
| 128 | return Operation(OperationCode::ThreadGeMask); | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return Immediate(0u); | ||
| 132 | } | ||
| 112 | default: | 133 | default: |
| 113 | UNIMPLEMENTED_MSG("Unhandled system move: {}", | 134 | UNIMPLEMENTED_MSG("Unhandled system move: {}", |
| 114 | static_cast<u32>(instr.sys20.Value())); | 135 | static_cast<u32>(instr.sys20.Value())); |
| @@ -272,10 +293,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 272 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | 293 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); |
| 273 | break; | 294 | break; |
| 274 | } | 295 | } |
| 296 | case OpCode::Id::BAR: { | ||
| 297 | UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); | ||
| 298 | bb.push_back(Operation(OperationCode::Barrier)); | ||
| 299 | break; | ||
| 300 | } | ||
| 275 | case OpCode::Id::MEMBAR: { | 301 | case OpCode::Id::MEMBAR: { |
| 276 | UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); | ||
| 277 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | 302 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); |
| 278 | bb.push_back(Operation(OperationCode::MemoryBarrierGL)); | 303 | const OperationCode type = [instr] { |
| 304 | switch (instr.membar.type) { | ||
| 305 | case Tegra::Shader::MembarType::CTA: | ||
| 306 | return OperationCode::MemoryBarrierGroup; | ||
| 307 | case Tegra::Shader::MembarType::GL: | ||
| 308 | return OperationCode::MemoryBarrierGlobal; | ||
| 309 | default: | ||
| 310 | UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); | ||
| 311 | return OperationCode::MemoryBarrierGlobal; | ||
| 312 | } | ||
| 313 | }(); | ||
| 314 | bb.push_back(Operation(type)); | ||
| 279 | break; | 315 | break; |
| 280 | } | 316 | } |
| 281 | case OpCode::Id::DEPBAR: { | 317 | case OpCode::Id::DEPBAR: { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 8f0bb996e..29ebf65ba 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 357 | return pc; | 357 | return pc; |
| 358 | } | 358 | } |
| 359 | 359 | ||
| 360 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, | 360 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( |
| 361 | std::optional<u32> buffer) { | 361 | SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { |
| 362 | if (info.IsComplete()) { | 362 | if (info.IsComplete()) { |
| 363 | return info; | 363 | return info; |
| 364 | } | 364 | } |
| 365 | const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) | ||
| 366 | : registry.ObtainBoundSampler(offset); | ||
| 367 | if (!sampler) { | 365 | if (!sampler) { |
| 368 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | 366 | LOG_WARNING(HW_GPU, "Unknown sampler info"); |
| 369 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); | 367 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); |
| @@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, | |||
| 381 | 379 | ||
| 382 | std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | 380 | std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, |
| 383 | SamplerInfo sampler_info) { | 381 | SamplerInfo sampler_info) { |
| 384 | const auto offset = static_cast<u32>(sampler.index.Value()); | 382 | const u32 offset = static_cast<u32>(sampler.index.Value()); |
| 385 | const auto info = GetSamplerInfo(sampler_info, offset); | 383 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); |
| 386 | 384 | ||
| 387 | // If this sampler has already been used, return the existing mapping. | 385 | // If this sampler has already been used, return the existing mapping. |
| 388 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 386 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), |
| @@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 404 | const Node sampler_register = GetRegister(reg); | 402 | const Node sampler_register = GetRegister(reg); |
| 405 | const auto [base_node, tracked_sampler_info] = | 403 | const auto [base_node, tracked_sampler_info] = |
| 406 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | 404 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 407 | ASSERT(base_node != nullptr); | 405 | if (!base_node) { |
| 408 | if (base_node == nullptr) { | 406 | UNREACHABLE(); |
| 409 | return std::nullopt; | 407 | return std::nullopt; |
| 410 | } | 408 | } |
| 411 | 409 | ||
| 412 | if (const auto bindless_sampler_info = | 410 | if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { |
| 413 | std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | 411 | const u32 buffer = sampler_info->index; |
| 414 | const u32 buffer = bindless_sampler_info->GetIndex(); | 412 | const u32 offset = sampler_info->offset; |
| 415 | const u32 offset = bindless_sampler_info->GetOffset(); | 413 | info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); |
| 416 | info = GetSamplerInfo(info, offset, buffer); | ||
| 417 | 414 | ||
| 418 | // If this sampler has already been used, return the existing mapping. | 415 | // If this sampler has already been used, return the existing mapping. |
| 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 416 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), |
| 420 | [buffer = buffer, offset = offset](const Sampler& entry) { | 417 | [buffer, offset](const Sampler& entry) { |
| 421 | return entry.buffer == buffer && entry.offset == offset; | 418 | return entry.buffer == buffer && entry.offset == offset; |
| 422 | }); | 419 | }); |
| 423 | if (it != used_samplers.end()) { | 420 | if (it != used_samplers.end()) { |
| @@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 431 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, | 428 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, |
| 432 | *info.is_shadow, *info.is_buffer, false); | 429 | *info.is_shadow, *info.is_buffer, false); |
| 433 | } | 430 | } |
| 434 | if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | 431 | if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { |
| 435 | const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; | 432 | const std::pair indices = sampler_info->indices; |
| 436 | index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); | 433 | const std::pair offsets = sampler_info->offsets; |
| 437 | info = GetSamplerInfo(info, base_offset); | 434 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); |
| 435 | |||
| 436 | // Try to use an already created sampler if it exists | ||
| 437 | const auto it = std::find_if( | ||
| 438 | used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { | ||
| 439 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | ||
| 440 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | ||
| 441 | }); | ||
| 442 | if (it != used_samplers.end()) { | ||
| 443 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | ||
| 444 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 445 | return *it; | ||
| 446 | } | ||
| 447 | |||
| 448 | // Otherwise create a new mapping for this sampler | ||
| 449 | const u32 next_index = static_cast<u32>(used_samplers.size()); | ||
| 450 | return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, | ||
| 451 | *info.is_shadow, *info.is_buffer); | ||
| 452 | } | ||
| 453 | if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | ||
| 454 | const u32 base_offset = sampler_info->base_offset / 4; | ||
| 455 | index_var = GetCustomVariable(sampler_info->bindless_var); | ||
| 456 | info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); | ||
| 438 | 457 | ||
| 439 | // If this sampler has already been used, return the existing mapping. | 458 | // If this sampler has already been used, return the existing mapping. |
| 440 | const auto it = std::find_if( | 459 | const auto it = std::find_if( |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index f75b62240..8f230d57a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -226,9 +226,16 @@ enum class OperationCode { | |||
| 226 | VoteEqual, /// (bool) -> bool | 226 | VoteEqual, /// (bool) -> bool |
| 227 | 227 | ||
| 228 | ThreadId, /// () -> uint | 228 | ThreadId, /// () -> uint |
| 229 | ThreadEqMask, /// () -> uint | ||
| 230 | ThreadGeMask, /// () -> uint | ||
| 231 | ThreadGtMask, /// () -> uint | ||
| 232 | ThreadLeMask, /// () -> uint | ||
| 233 | ThreadLtMask, /// () -> uint | ||
| 229 | ShuffleIndexed, /// (uint value, uint index) -> uint | 234 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 230 | 235 | ||
| 231 | MemoryBarrierGL, /// () -> void | 236 | Barrier, /// () -> void |
| 237 | MemoryBarrierGroup, /// () -> void | ||
| 238 | MemoryBarrierGlobal, /// () -> void | ||
| 232 | 239 | ||
| 233 | Amount, | 240 | Amount, |
| 234 | }; | 241 | }; |
| @@ -268,10 +275,11 @@ using Node = std::shared_ptr<NodeData>; | |||
| 268 | using Node4 = std::array<Node, 4>; | 275 | using Node4 = std::array<Node, 4>; |
| 269 | using NodeBlock = std::vector<Node>; | 276 | using NodeBlock = std::vector<Node>; |
| 270 | 277 | ||
| 271 | class BindlessSamplerNode; | 278 | struct ArraySamplerNode; |
| 272 | class ArraySamplerNode; | 279 | struct BindlessSamplerNode; |
| 280 | struct SeparateSamplerNode; | ||
| 273 | 281 | ||
| 274 | using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; | 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; |
| 275 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; |
| 276 | 284 | ||
| 277 | struct Sampler { | 285 | struct Sampler { |
| @@ -281,63 +289,51 @@ struct Sampler { | |||
| 281 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, | 289 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, |
| 282 | is_buffer{is_buffer}, is_indexed{is_indexed} {} | 290 | is_buffer{is_buffer}, is_indexed{is_indexed} {} |
| 283 | 291 | ||
| 292 | /// Separate sampler constructor | ||
| 293 | constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, | ||
| 294 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow, | ||
| 295 | bool is_buffer) | ||
| 296 | : index{index}, offset{offsets.first}, secondary_offset{offsets.second}, | ||
| 297 | buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array}, | ||
| 298 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {} | ||
| 299 | |||
| 284 | /// Bindless samplers constructor | 300 | /// Bindless samplers constructor |
| 285 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, | 301 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, |
| 286 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) | 302 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) |
| 287 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, | 303 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, |
| 288 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} | 304 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} |
| 289 | 305 | ||
| 290 | u32 index = 0; ///< Emulated index given for the this sampler. | 306 | u32 index = 0; ///< Emulated index given for the this sampler. |
| 291 | u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. | 307 | u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. |
| 292 | u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). | 308 | u32 secondary_offset = 0; ///< Secondary offset in the const buffer. |
| 293 | u32 size = 1; ///< Size of the sampler. | 309 | u32 buffer = 0; ///< Buffer where the bindless sampler is read. |
| 310 | u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. | ||
| 311 | u32 size = 1; ///< Size of the sampler. | ||
| 294 | 312 | ||
| 295 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | 313 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
| 296 | bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. | 314 | bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. |
| 297 | bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. | 315 | bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. |
| 298 | bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. | 316 | bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. |
| 299 | bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. | 317 | bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. |
| 300 | bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. | 318 | bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. |
| 319 | bool is_separated = false; ///< Whether the image and sampler is separated or not. | ||
| 301 | }; | 320 | }; |
| 302 | 321 | ||
| 303 | /// Represents a tracked bindless sampler into a direct const buffer | 322 | /// Represents a tracked bindless sampler into a direct const buffer |
| 304 | class ArraySamplerNode final { | 323 | struct ArraySamplerNode { |
| 305 | public: | ||
| 306 | explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) | ||
| 307 | : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} | ||
| 308 | |||
| 309 | constexpr u32 GetIndex() const { | ||
| 310 | return index; | ||
| 311 | } | ||
| 312 | |||
| 313 | constexpr u32 GetBaseOffset() const { | ||
| 314 | return base_offset; | ||
| 315 | } | ||
| 316 | |||
| 317 | constexpr u32 GetIndexVar() const { | ||
| 318 | return bindless_var; | ||
| 319 | } | ||
| 320 | |||
| 321 | private: | ||
| 322 | u32 index; | 324 | u32 index; |
| 323 | u32 base_offset; | 325 | u32 base_offset; |
| 324 | u32 bindless_var; | 326 | u32 bindless_var; |
| 325 | }; | 327 | }; |
| 326 | 328 | ||
| 327 | /// Represents a tracked bindless sampler into a direct const buffer | 329 | /// Represents a tracked separate sampler image pair that was folded statically |
| 328 | class BindlessSamplerNode final { | 330 | struct SeparateSamplerNode { |
| 329 | public: | 331 | std::pair<u32, u32> indices; |
| 330 | explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} | 332 | std::pair<u32, u32> offsets; |
| 331 | 333 | }; | |
| 332 | constexpr u32 GetIndex() const { | ||
| 333 | return index; | ||
| 334 | } | ||
| 335 | |||
| 336 | constexpr u32 GetOffset() const { | ||
| 337 | return offset; | ||
| 338 | } | ||
| 339 | 334 | ||
| 340 | private: | 335 | /// Represents a tracked bindless sampler into a direct const buffer |
| 336 | struct BindlessSamplerNode { | ||
| 341 | u32 index; | 337 | u32 index; |
| 342 | u32 offset; | 338 | u32 offset; |
| 343 | }; | 339 | }; |
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 11231bbea..1e0886185 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h | |||
| @@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) { | |||
| 48 | template <typename T, typename... Args> | 48 | template <typename T, typename... Args> |
| 49 | TrackSampler MakeTrackSampler(Args&&... args) { | 49 | TrackSampler MakeTrackSampler(Args&&... args) { |
| 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); | 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); |
| 51 | return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); | 51 | return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | template <typename... Args> | 54 | template <typename... Args> |
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index af70b3f35..cdf274e54 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp | |||
| @@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | |||
| 93 | return value; | 93 | return value; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( | ||
| 97 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { | ||
| 98 | SeparateSamplerKey key; | ||
| 99 | key.buffers = buffers; | ||
| 100 | key.offsets = offsets; | ||
| 101 | const auto iter = separate_samplers.find(key); | ||
| 102 | if (iter != separate_samplers.end()) { | ||
| 103 | return iter->second; | ||
| 104 | } | ||
| 105 | if (!engine) { | ||
| 106 | return std::nullopt; | ||
| 107 | } | ||
| 108 | |||
| 109 | const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); | ||
| 110 | const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); | ||
| 111 | const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); | ||
| 112 | separate_samplers.emplace(key, value); | ||
| 113 | return value; | ||
| 114 | } | ||
| 115 | |||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, | 116 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, |
| 97 | u32 offset) { | 117 | u32 offset) { |
| 98 | const std::pair key = {buffer, offset}; | 118 | const std::pair key = {buffer, offset}; |
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 0c80d35fd..231206765 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h | |||
| @@ -19,8 +19,39 @@ | |||
| 19 | 19 | ||
| 20 | namespace VideoCommon::Shader { | 20 | namespace VideoCommon::Shader { |
| 21 | 21 | ||
| 22 | struct SeparateSamplerKey { | ||
| 23 | std::pair<u32, u32> buffers; | ||
| 24 | std::pair<u32, u32> offsets; | ||
| 25 | }; | ||
| 26 | |||
| 27 | } // namespace VideoCommon::Shader | ||
| 28 | |||
| 29 | namespace std { | ||
| 30 | |||
| 31 | template <> | ||
| 32 | struct hash<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 33 | std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { | ||
| 34 | return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ | ||
| 35 | key.offsets.second); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | template <> | ||
| 40 | struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 41 | bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, | ||
| 42 | const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { | ||
| 43 | return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; | ||
| 44 | } | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace std | ||
| 48 | |||
| 49 | namespace VideoCommon::Shader { | ||
| 50 | |||
| 22 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | 51 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; |
| 23 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | 52 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; |
| 53 | using SeparateSamplerMap = | ||
| 54 | std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; | ||
| 24 | using BindlessSamplerMap = | 55 | using BindlessSamplerMap = |
| 25 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | 56 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; |
| 26 | 57 | ||
| @@ -73,6 +104,9 @@ public: | |||
| 73 | 104 | ||
| 74 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | 105 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); |
| 75 | 106 | ||
| 107 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( | ||
| 108 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); | ||
| 109 | |||
| 76 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | 110 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |
| 77 | 111 | ||
| 78 | /// Inserts a key. | 112 | /// Inserts a key. |
| @@ -128,6 +162,7 @@ private: | |||
| 128 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | 162 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |
| 129 | KeyMap keys; | 163 | KeyMap keys; |
| 130 | BoundSamplerMap bound_samplers; | 164 | BoundSamplerMap bound_samplers; |
| 165 | SeparateSamplerMap separate_samplers; | ||
| 131 | BindlessSamplerMap bindless_samplers; | 166 | BindlessSamplerMap bindless_samplers; |
| 132 | u32 bound_buffer; | 167 | u32 bound_buffer; |
| 133 | GraphicsInfo graphics_info; | 168 | GraphicsInfo graphics_info; |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 15ae152f2..3a98b2104 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -330,8 +330,8 @@ private: | |||
| 330 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | 330 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |
| 331 | 331 | ||
| 332 | /// Queries the missing sampler info from the execution context. | 332 | /// Queries the missing sampler info from the execution context. |
| 333 | SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, | 333 | SamplerInfo GetSamplerInfo(SamplerInfo info, |
| 334 | std::optional<u32> buffer = std::nullopt); | 334 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); |
| 335 | 335 | ||
| 336 | /// Accesses a texture sampler. | 336 | /// Accesses a texture sampler. |
| 337 | std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); | 337 | std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); |
| @@ -409,8 +409,14 @@ private: | |||
| 409 | 409 | ||
| 410 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 410 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 411 | 411 | ||
| 412 | std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | 412 | std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, |
| 413 | s64 cursor); | 413 | s64 cursor); |
| 414 | |||
| 415 | std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, | ||
| 416 | const OperationNode& operation, | ||
| 417 | Node gpr, Node base_offset, | ||
| 418 | Node tracked, const NodeBlock& code, | ||
| 419 | s64 cursor); | ||
| 414 | 420 | ||
| 415 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 421 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 416 | 422 | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index eb97bfd41..d5ed81442 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | namespace VideoCommon::Shader { | 14 | namespace VideoCommon::Shader { |
| 15 | 15 | ||
| 16 | namespace { | 16 | namespace { |
| 17 | |||
| 17 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 18 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 18 | OperationCode operation_code) { | 19 | OperationCode operation_code) { |
| 19 | for (; cursor >= 0; --cursor) { | 20 | for (; cursor >= 0; --cursor) { |
| @@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { | |||
| 63 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 64 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 64 | operation->SetAmendIndex(amend_index); | 65 | operation->SetAmendIndex(amend_index); |
| 65 | return true; | 66 | return true; |
| 66 | } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 67 | } |
| 68 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 67 | conditional->SetAmendIndex(amend_index); | 69 | conditional->SetAmendIndex(amend_index); |
| 68 | return true; | 70 | return true; |
| 69 | } | 71 | } |
| @@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { | |||
| 72 | 74 | ||
| 73 | } // Anonymous namespace | 75 | } // Anonymous namespace |
| 74 | 76 | ||
| 75 | std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | 77 | std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, |
| 76 | s64 cursor) { | 78 | s64 cursor) { |
| 77 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 79 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
| 80 | const u32 cbuf_index = cbuf->GetIndex(); | ||
| 81 | |||
| 78 | // Constant buffer found, test if it's an immediate | 82 | // Constant buffer found, test if it's an immediate |
| 79 | const auto& offset = cbuf->GetOffset(); | 83 | const auto& offset = cbuf->GetOffset(); |
| 80 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | 84 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |
| 81 | auto track = | 85 | auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); |
| 82 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | ||
| 83 | return {tracked, track}; | 86 | return {tracked, track}; |
| 84 | } | 87 | } |
| 85 | if (const auto operation = std::get_if<OperationNode>(&*offset)) { | 88 | if (const auto operation = std::get_if<OperationNode>(&*offset)) { |
| 86 | const u32 bound_buffer = registry.GetBoundBuffer(); | 89 | const u32 bound_buffer = registry.GetBoundBuffer(); |
| 87 | if (bound_buffer != cbuf->GetIndex()) { | 90 | if (bound_buffer != cbuf_index) { |
| 88 | return {}; | 91 | return {}; |
| 89 | } | 92 | } |
| 90 | const auto pair = DecoupleIndirectRead(*operation); | 93 | if (const std::optional pair = DecoupleIndirectRead(*operation)) { |
| 91 | if (!pair) { | 94 | auto [gpr, base_offset] = *pair; |
| 92 | return {}; | 95 | return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, |
| 96 | code, cursor); | ||
| 93 | } | 97 | } |
| 94 | auto [gpr, base_offset] = *pair; | ||
| 95 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | ||
| 96 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 97 | const u32 bindless_cv = NewCustomVariable(); | ||
| 98 | Node op = | ||
| 99 | Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); | ||
| 100 | |||
| 101 | const Node cv_node = GetCustomVariable(bindless_cv); | ||
| 102 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | ||
| 103 | const std::size_t amend_index = DeclareAmend(std::move(amend_op)); | ||
| 104 | AmendNodeCv(amend_index, code[cursor]); | ||
| 105 | // TODO Implement Bindless Index custom variable | ||
| 106 | auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), | ||
| 107 | offset_inm->GetValue(), bindless_cv); | ||
| 108 | return {tracked, track}; | ||
| 109 | } | 98 | } |
| 110 | return {}; | 99 | return {}; |
| 111 | } | 100 | } |
| @@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 122 | return TrackBindlessSampler(source, code, new_cursor); | 111 | return TrackBindlessSampler(source, code, new_cursor); |
| 123 | } | 112 | } |
| 124 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 113 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 125 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | 114 | const OperationNode& op = *operation; |
| 126 | if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); | 115 | |
| 127 | std::get<0>(found)) { | 116 | const OperationCode opcode = operation->GetCode(); |
| 128 | // Cbuf found in operand. | 117 | if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { |
| 118 | ASSERT(op.GetOperandsCount() == 2); | ||
| 119 | auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); | ||
| 120 | auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); | ||
| 121 | if (node_a && node_b) { | ||
| 122 | auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, | ||
| 123 | std::pair{offset_a, offset_b}); | ||
| 124 | return {tracked, std::move(track)}; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | std::size_t i = op.GetOperandsCount(); | ||
| 128 | while (i--) { | ||
| 129 | if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { | ||
| 130 | // Constant buffer found in operand. | ||
| 129 | return found; | 131 | return found; |
| 130 | } | 132 | } |
| 131 | } | 133 | } |
| @@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 139 | return {}; | 141 | return {}; |
| 140 | } | 142 | } |
| 141 | 143 | ||
| 144 | std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( | ||
| 145 | const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, | ||
| 146 | const NodeBlock& code, s64 cursor) { | ||
| 147 | const auto offset_imm = std::get<ImmediateNode>(*base_offset); | ||
| 148 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 149 | const u32 bindless_cv = NewCustomVariable(); | ||
| 150 | const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); | ||
| 151 | Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); | ||
| 152 | |||
| 153 | Node cv_node = GetCustomVariable(bindless_cv); | ||
| 154 | Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); | ||
| 155 | const std::size_t amend_index = DeclareAmend(std::move(amend_op)); | ||
| 156 | AmendNodeCv(amend_index, code[cursor]); | ||
| 157 | |||
| 158 | // TODO: Implement bindless index custom variable | ||
| 159 | auto track = | ||
| 160 | MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); | ||
| 161 | return {tracked, track}; | ||
| 162 | } | ||
| 163 | |||
| 142 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | 164 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 143 | s64 cursor) const { | 165 | s64 cursor) const { |
| 144 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 166 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h new file mode 100644 index 000000000..a23c23886 --- /dev/null +++ b/src/video_core/shader_cache.h | |||
| @@ -0,0 +1,228 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/rasterizer_interface.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | template <class T> | ||
| 21 | class ShaderCache { | ||
| 22 | static constexpr u64 PAGE_SHIFT = 14; | ||
| 23 | |||
| 24 | struct Entry { | ||
| 25 | VAddr addr_start; | ||
| 26 | VAddr addr_end; | ||
| 27 | T* data; | ||
| 28 | |||
| 29 | bool is_memory_marked = true; | ||
| 30 | |||
| 31 | constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { | ||
| 32 | return start < addr_end && addr_start < end; | ||
| 33 | } | ||
| 34 | }; | ||
| 35 | |||
| 36 | public: | ||
| 37 | virtual ~ShaderCache() = default; | ||
| 38 | |||
| 39 | /// @brief Removes shaders inside a given region | ||
| 40 | /// @note Checks for ranges | ||
| 41 | /// @param addr Start address of the invalidation | ||
| 42 | /// @param size Number of bytes of the invalidation | ||
| 43 | void InvalidateRegion(VAddr addr, std::size_t size) { | ||
| 44 | std::scoped_lock lock{invalidation_mutex}; | ||
| 45 | InvalidatePagesInRegion(addr, size); | ||
| 46 | RemovePendingShaders(); | ||
| 47 | } | ||
| 48 | |||
| 49 | /// @brief Unmarks a memory region as cached and marks it for removal | ||
| 50 | /// @param addr Start address of the CPU write operation | ||
| 51 | /// @param size Number of bytes of the CPU write operation | ||
| 52 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 53 | std::lock_guard lock{invalidation_mutex}; | ||
| 54 | InvalidatePagesInRegion(addr, size); | ||
| 55 | } | ||
| 56 | |||
| 57 | /// @brief Flushes delayed removal operations | ||
| 58 | void SyncGuestHost() { | ||
| 59 | std::scoped_lock lock{invalidation_mutex}; | ||
| 60 | RemovePendingShaders(); | ||
| 61 | } | ||
| 62 | |||
| 63 | /// @brief Tries to obtain a cached shader starting in a given address | ||
| 64 | /// @note Doesn't check for ranges, the given address has to be the start of the shader | ||
| 65 | /// @param addr Start address of the shader, this doesn't cache for region | ||
| 66 | /// @return Pointer to a valid shader, nullptr when nothing is found | ||
| 67 | T* TryGet(VAddr addr) const { | ||
| 68 | std::scoped_lock lock{lookup_mutex}; | ||
| 69 | |||
| 70 | const auto it = lookup_cache.find(addr); | ||
| 71 | if (it == lookup_cache.end()) { | ||
| 72 | return nullptr; | ||
| 73 | } | ||
| 74 | return it->second->data; | ||
| 75 | } | ||
| 76 | |||
| 77 | protected: | ||
| 78 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | ||
| 79 | |||
| 80 | /// @brief Register in the cache a given entry | ||
| 81 | /// @param data Shader to store in the cache | ||
| 82 | /// @param addr Start address of the shader that will be registered | ||
| 83 | /// @param size Size in bytes of the shader | ||
| 84 | void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { | ||
| 85 | std::scoped_lock lock{invalidation_mutex, lookup_mutex}; | ||
| 86 | |||
| 87 | const VAddr addr_end = addr + size; | ||
| 88 | Entry* const entry = NewEntry(addr, addr_end, data.get()); | ||
| 89 | |||
| 90 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 91 | for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 92 | invalidation_cache[page].push_back(entry); | ||
| 93 | } | ||
| 94 | |||
| 95 | storage.push_back(std::move(data)); | ||
| 96 | |||
| 97 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | ||
| 98 | } | ||
| 99 | |||
| 100 | /// @brief Called when a shader is going to be removed | ||
| 101 | /// @param shader Shader that will be removed | ||
| 102 | /// @pre invalidation_cache is locked | ||
| 103 | /// @pre lookup_mutex is locked | ||
| 104 | virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} | ||
| 105 | |||
| 106 | private: | ||
| 107 | /// @brief Invalidate pages in a given region | ||
| 108 | /// @pre invalidation_mutex is locked | ||
| 109 | void InvalidatePagesInRegion(VAddr addr, std::size_t size) { | ||
| 110 | const VAddr addr_end = addr + size; | ||
| 111 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 112 | for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 113 | const auto it = invalidation_cache.find(page); | ||
| 114 | if (it == invalidation_cache.end()) { | ||
| 115 | continue; | ||
| 116 | } | ||
| 117 | |||
| 118 | std::vector<Entry*>& entries = it->second; | ||
| 119 | InvalidatePageEntries(entries, addr, addr_end); | ||
| 120 | |||
| 121 | // If there's nothing else in this page, remove it to avoid overpopulating the hash map. | ||
| 122 | if (entries.empty()) { | ||
| 123 | invalidation_cache.erase(it); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | /// @brief Remove shaders marked for deletion | ||
| 129 | /// @pre invalidation_mutex is locked | ||
| 130 | void RemovePendingShaders() { | ||
| 131 | if (marked_for_removal.empty()) { | ||
| 132 | return; | ||
| 133 | } | ||
| 134 | std::scoped_lock lock{lookup_mutex}; | ||
| 135 | |||
| 136 | std::vector<T*> removed_shaders; | ||
| 137 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 138 | |||
| 139 | for (Entry* const entry : marked_for_removal) { | ||
| 140 | if (lookup_cache.erase(entry->addr_start) > 0) { | ||
| 141 | removed_shaders.push_back(entry->data); | ||
| 142 | } | ||
| 143 | } | ||
| 144 | marked_for_removal.clear(); | ||
| 145 | |||
| 146 | if (!removed_shaders.empty()) { | ||
| 147 | RemoveShadersFromStorage(std::move(removed_shaders)); | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | /// @brief Invalidates entries in a given range for the passed page | ||
| 152 | /// @param entries Vector of entries in the page, it will be modified on overlaps | ||
| 153 | /// @param addr Start address of the invalidation | ||
| 154 | /// @param addr_end Non-inclusive end address of the invalidation | ||
| 155 | /// @pre invalidation_mutex is locked | ||
| 156 | void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { | ||
| 157 | auto it = entries.begin(); | ||
| 158 | while (it != entries.end()) { | ||
| 159 | Entry* const entry = *it; | ||
| 160 | if (!entry->Overlaps(addr, addr_end)) { | ||
| 161 | ++it; | ||
| 162 | continue; | ||
| 163 | } | ||
| 164 | UnmarkMemory(entry); | ||
| 165 | marked_for_removal.push_back(entry); | ||
| 166 | |||
| 167 | it = entries.erase(it); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | /// @brief Unmarks an entry from the rasterizer cache | ||
| 172 | /// @param entry Entry to unmark from memory | ||
| 173 | void UnmarkMemory(Entry* entry) { | ||
| 174 | if (!entry->is_memory_marked) { | ||
| 175 | return; | ||
| 176 | } | ||
| 177 | entry->is_memory_marked = false; | ||
| 178 | |||
| 179 | const VAddr addr = entry->addr_start; | ||
| 180 | const std::size_t size = entry->addr_end - addr; | ||
| 181 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | ||
| 182 | } | ||
| 183 | |||
| 184 | /// @brief Removes a vector of shaders from a list | ||
| 185 | /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates | ||
| 186 | /// @pre invalidation_mutex is locked | ||
| 187 | /// @pre lookup_mutex is locked | ||
| 188 | void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { | ||
| 189 | // Remove duplicates | ||
| 190 | std::sort(removed_shaders.begin(), removed_shaders.end()); | ||
| 191 | removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()), | ||
| 192 | removed_shaders.end()); | ||
| 193 | |||
| 194 | // Now that there are no duplicates, we can notify removals | ||
| 195 | for (T* const shader : removed_shaders) { | ||
| 196 | OnShaderRemoval(shader); | ||
| 197 | } | ||
| 198 | |||
| 199 | // Remove them from the cache | ||
| 200 | const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) { | ||
| 201 | return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != | ||
| 202 | removed_shaders.end(); | ||
| 203 | }; | ||
| 204 | storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end()); | ||
| 205 | } | ||
| 206 | |||
| 207 | /// @brief Creates a new entry in the lookup cache and returns its pointer | ||
| 208 | /// @pre lookup_mutex is locked | ||
| 209 | Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { | ||
| 210 | auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); | ||
| 211 | Entry* const entry_pointer = entry.get(); | ||
| 212 | |||
| 213 | lookup_cache.emplace(addr, std::move(entry)); | ||
| 214 | return entry_pointer; | ||
| 215 | } | ||
| 216 | |||
| 217 | VideoCore::RasterizerInterface& rasterizer; | ||
| 218 | |||
| 219 | mutable std::mutex lookup_mutex; | ||
| 220 | std::mutex invalidation_mutex; | ||
| 221 | |||
| 222 | std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; | ||
| 223 | std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; | ||
| 224 | std::vector<std::unique_ptr<T>> storage; | ||
| 225 | std::vector<Entry*> marked_for_removal; | ||
| 226 | }; | ||
| 227 | |||
| 228 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -41,7 +41,7 @@ struct Table { | |||
| 41 | ComponentType alpha_component; | 41 | ComponentType alpha_component; |
| 42 | bool is_srgb; | 42 | bool is_srgb; |
| 43 | }; | 43 | }; |
| 44 | constexpr std::array<Table, 77> DefinitionTable = {{ | 44 | constexpr std::array<Table, 78> DefinitionTable = {{ |
| 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, | 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, |
| 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, | 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, |
| 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, | 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, |
| @@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{ | |||
| 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, | 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, |
| 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, | 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, |
| 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, |
| 101 | {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | ||
| 101 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, | 102 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, |
| 102 | 103 | ||
| 103 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, | 104 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 715f39d0d..94d3a6ae5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
| 248 | 248 | ||
| 249 | // Use an extra temporal buffer | 249 | // Use an extra temporal buffer |
| 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); | 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); |
| 251 | // Special case for 3D Texture Segments | ||
| 252 | const bool must_read_current_data = | ||
| 253 | params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; | ||
| 254 | tmp_buffer.resize(guest_memory_size); | 251 | tmp_buffer.resize(guest_memory_size); |
| 255 | host_ptr = tmp_buffer.data(); | 252 | host_ptr = tmp_buffer.data(); |
| 256 | if (must_read_current_data) { | 253 | |
| 254 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 255 | // Special case for 3D texture segments | ||
| 257 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | 256 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
| 258 | } | 257 | } |
| 259 | 258 | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 79e10ffbb..173f2edba 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -217,8 +217,8 @@ public: | |||
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | bool IsProtected() const { | 219 | bool IsProtected() const { |
| 220 | // Only 3D Slices are to be protected | 220 | // Only 3D slices are to be protected |
| 221 | return is_target && params.block_depth > 0; | 221 | return is_target && params.target == SurfaceTarget::Texture3D; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | bool IsRenderTarget() const { | 224 | bool IsRenderTarget() const { |
| @@ -250,6 +250,11 @@ public: | |||
| 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); |
| 251 | } | 251 | } |
| 252 | 252 | ||
| 253 | TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { | ||
| 254 | return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, | ||
| 255 | base_level, num_levels)); | ||
| 256 | } | ||
| 257 | |||
| 253 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | 258 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, |
| 254 | const GPUVAddr view_addr, | 259 | const GPUVAddr view_addr, |
| 255 | const std::size_t candidate_size, const u32 mipmap, | 260 | const std::size_t candidate_size, const u32 mipmap, |
| @@ -272,8 +277,8 @@ public: | |||
| 272 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | 277 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, |
| 273 | const std::size_t candidate_size) { | 278 | const std::size_t candidate_size) { |
| 274 | if (params.target == SurfaceTarget::Texture3D || | 279 | if (params.target == SurfaceTarget::Texture3D || |
| 275 | (params.num_levels == 1 && !params.is_layered) || | 280 | view_params.target == SurfaceTarget::Texture3D || |
| 276 | view_params.target == SurfaceTarget::Texture3D) { | 281 | (params.num_levels == 1 && !params.is_layered)) { |
| 277 | return {}; | 282 | return {}; |
| 278 | } | 283 | } |
| 279 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | 284 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 884fabffe..0b2b2b8c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz | |||
| 215 | params.num_levels = 1; | 215 | params.num_levels = 1; |
| 216 | params.emulated_levels = 1; | 216 | params.emulated_levels = 1; |
| 217 | 217 | ||
| 218 | const bool is_layered = config.layers > 1 && params.block_depth == 0; | 218 | if (config.memory_layout.is_3d != 0) { |
| 219 | params.is_layered = is_layered; | 219 | params.depth = config.layers.Value(); |
| 220 | params.depth = is_layered ? config.layers.Value() : 1; | 220 | params.is_layered = false; |
| 221 | params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | 221 | params.target = SurfaceTarget::Texture3D; |
| 222 | } else if (config.layers > 1) { | ||
| 223 | params.depth = config.layers.Value(); | ||
| 224 | params.is_layered = true; | ||
| 225 | params.target = SurfaceTarget::Texture2DArray; | ||
| 226 | } else { | ||
| 227 | params.depth = 1; | ||
| 228 | params.is_layered = false; | ||
| 229 | params.target = SurfaceTarget::Texture2D; | ||
| 230 | } | ||
| 222 | return params; | 231 | return params; |
| 223 | } | 232 | } |
| 224 | 233 | ||
| @@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | |||
| 237 | params.width = config.width; | 246 | params.width = config.width; |
| 238 | params.height = config.height; | 247 | params.height = config.height; |
| 239 | params.pitch = config.pitch; | 248 | params.pitch = config.pitch; |
| 240 | // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters | 249 | // TODO(Rodrigo): Try to guess texture arrays from parameters |
| 241 | params.target = SurfaceTarget::Texture2D; | 250 | params.target = SurfaceTarget::Texture2D; |
| 242 | params.depth = 1; | 251 | params.depth = 1; |
| 243 | params.num_levels = 1; | 252 | params.num_levels = 1; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d6efc34b2..b543fc8c0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <vector> | 15 | #include <vector> |
| 16 | 16 | ||
| 17 | #include <boost/container/small_vector.hpp> | ||
| 17 | #include <boost/icl/interval_map.hpp> | 18 | #include <boost/icl/interval_map.hpp> |
| 18 | #include <boost/range/iterator_range.hpp> | 19 | #include <boost/range/iterator_range.hpp> |
| 19 | 20 | ||
| @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 53 | 54 | ||
| 54 | template <typename TSurface, typename TView> | 55 | template <typename TSurface, typename TView> |
| 55 | class TextureCache { | 56 | class TextureCache { |
| 57 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | ||
| 56 | 58 | ||
| 57 | public: | 59 | public: |
| 58 | void InvalidateRegion(VAddr addr, std::size_t size) { | 60 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| @@ -296,30 +298,30 @@ public: | |||
| 296 | const GPUVAddr src_gpu_addr = src_config.Address(); | 298 | const GPUVAddr src_gpu_addr = src_config.Address(); |
| 297 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | 299 | const GPUVAddr dst_gpu_addr = dst_config.Address(); |
| 298 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | 300 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); |
| 299 | const std::optional<VAddr> dst_cpu_addr = | 301 | |
| 300 | system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); | 302 | const auto& memory_manager = system.GPU().MemoryManager(); |
| 301 | const std::optional<VAddr> src_cpu_addr = | 303 | const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); |
| 302 | system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); | 304 | const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); |
| 303 | std::pair<TSurface, TView> dst_surface = | 305 | std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); |
| 304 | GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); | 306 | TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; |
| 305 | std::pair<TSurface, TView> src_surface = | 307 | ImageBlit(src_surface, dst_surface.second, copy_config); |
| 306 | GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); | ||
| 307 | ImageBlit(src_surface.second, dst_surface.second, copy_config); | ||
| 308 | dst_surface.first->MarkAsModified(true, Tick()); | 308 | dst_surface.first->MarkAsModified(true, Tick()); |
| 309 | } | 309 | } |
| 310 | 310 | ||
| 311 | TSurface TryFindFramebufferSurface(VAddr addr) { | 311 | TSurface TryFindFramebufferSurface(VAddr addr) const { |
| 312 | if (!addr) { | 312 | if (!addr) { |
| 313 | return nullptr; | 313 | return nullptr; |
| 314 | } | 314 | } |
| 315 | const VAddr page = addr >> registry_page_bits; | 315 | const VAddr page = addr >> registry_page_bits; |
| 316 | std::vector<TSurface>& list = registry[page]; | 316 | const auto it = registry.find(page); |
| 317 | for (auto& surface : list) { | 317 | if (it == registry.end()) { |
| 318 | if (surface->GetCpuAddr() == addr) { | 318 | return nullptr; |
| 319 | return surface; | ||
| 320 | } | ||
| 321 | } | 319 | } |
| 322 | return nullptr; | 320 | const auto& list = it->second; |
| 321 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 322 | return surface->GetCpuAddr() == addr; | ||
| 323 | }); | ||
| 324 | return found != list.end() ? *found : nullptr; | ||
| 323 | } | 325 | } |
| 324 | 326 | ||
| 325 | u64 Tick() { | 327 | u64 Tick() { |
| @@ -498,18 +500,18 @@ private: | |||
| 498 | * @param untopological Indicates to the recycler that the texture has no way | 500 | * @param untopological Indicates to the recycler that the texture has no way |
| 499 | * to match the overlaps due to topological reasons. | 501 | * to match the overlaps due to topological reasons. |
| 500 | **/ | 502 | **/ |
| 501 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 503 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, |
| 502 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 504 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| 503 | if (Settings::IsGPULevelExtreme()) { | 505 | if (Settings::IsGPULevelExtreme()) { |
| 504 | return RecycleStrategy::Flush; | 506 | return RecycleStrategy::Flush; |
| 505 | } | 507 | } |
| 506 | // 3D Textures decision | 508 | // 3D Textures decision |
| 507 | if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { | 509 | if (params.target == SurfaceTarget::Texture3D) { |
| 508 | return RecycleStrategy::Flush; | 510 | return RecycleStrategy::Flush; |
| 509 | } | 511 | } |
| 510 | for (const auto& s : overlaps) { | 512 | for (const auto& s : overlaps) { |
| 511 | const auto& s_params = s->GetSurfaceParams(); | 513 | const auto& s_params = s->GetSurfaceParams(); |
| 512 | if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { | 514 | if (s_params.target == SurfaceTarget::Texture3D) { |
| 513 | return RecycleStrategy::Flush; | 515 | return RecycleStrategy::Flush; |
| 514 | } | 516 | } |
| 515 | } | 517 | } |
| @@ -538,9 +540,8 @@ private: | |||
| 538 | * @param untopological Indicates to the recycler that the texture has no way to match the | 540 | * @param untopological Indicates to the recycler that the texture has no way to match the |
| 539 | * overlaps due to topological reasons. | 541 | * overlaps due to topological reasons. |
| 540 | **/ | 542 | **/ |
| 541 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | 543 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, |
| 542 | const SurfaceParams& params, const GPUVAddr gpu_addr, | 544 | const GPUVAddr gpu_addr, const bool preserve_contents, |
| 543 | const bool preserve_contents, | ||
| 544 | const MatchTopologyResult untopological) { | 545 | const MatchTopologyResult untopological) { |
| 545 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | 546 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); |
| 546 | for (auto& surface : overlaps) { | 547 | for (auto& surface : overlaps) { |
| @@ -650,47 +651,65 @@ private: | |||
| 650 | * @param params The parameters on the new surface. | 651 | * @param params The parameters on the new surface. |
| 651 | * @param gpu_addr The starting address of the new surface. | 652 | * @param gpu_addr The starting address of the new surface. |
| 652 | **/ | 653 | **/ |
| 653 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | 654 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, |
| 654 | const SurfaceParams& params, | 655 | const SurfaceParams& params, |
| 655 | const GPUVAddr gpu_addr) { | 656 | GPUVAddr gpu_addr) { |
| 656 | if (params.target == SurfaceTarget::Texture3D) { | 657 | if (params.target == SurfaceTarget::Texture3D) { |
| 657 | return {}; | 658 | return std::nullopt; |
| 658 | } | 659 | } |
| 659 | bool modified = false; | 660 | const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; |
| 660 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | 661 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); |
| 661 | u32 passed_tests = 0; | 662 | |
| 663 | if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { | ||
| 664 | LoadSurface(new_surface); | ||
| 665 | for (const auto& surface : overlaps) { | ||
| 666 | Unregister(surface); | ||
| 667 | } | ||
| 668 | Register(new_surface); | ||
| 669 | return {{new_surface, new_surface->GetMainView()}}; | ||
| 670 | } | ||
| 671 | |||
| 672 | std::size_t passed_tests = 0; | ||
| 662 | for (auto& surface : overlaps) { | 673 | for (auto& surface : overlaps) { |
| 663 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 674 | const SurfaceParams& src_params = surface->GetSurfaceParams(); |
| 664 | if (src_params.is_layered || src_params.num_levels > 1) { | 675 | const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; |
| 665 | // We send this cases to recycle as they are more complex to handle | ||
| 666 | return {}; | ||
| 667 | } | ||
| 668 | const std::size_t candidate_size = surface->GetSizeInBytes(); | ||
| 669 | auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | ||
| 670 | if (!mipmap_layer) { | 676 | if (!mipmap_layer) { |
| 671 | continue; | 677 | continue; |
| 672 | } | 678 | } |
| 673 | const auto [layer, mipmap] = *mipmap_layer; | 679 | const auto [base_layer, base_mipmap] = *mipmap_layer; |
| 674 | if (new_surface->GetMipmapSize(mipmap) != candidate_size) { | 680 | if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { |
| 675 | continue; | 681 | continue; |
| 676 | } | 682 | } |
| 677 | modified |= surface->IsModified(); | 683 | ++passed_tests; |
| 678 | // Now we got all the data set up | 684 | |
| 679 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); | 685 | // Copy all mipmaps and layers |
| 680 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | 686 | const u32 block_width = params.GetDefaultBlockWidth(); |
| 681 | const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); | 687 | const u32 block_height = params.GetDefaultBlockHeight(); |
| 682 | passed_tests++; | 688 | for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { |
| 683 | ImageCopy(surface, new_surface, copy_params); | 689 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); |
| 690 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | ||
| 691 | if (width < block_width || height < block_height) { | ||
| 692 | // Current APIs forbid copying small compressed textures, avoid errors | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, | ||
| 696 | src_params.depth); | ||
| 697 | ImageCopy(surface, new_surface, copy_params); | ||
| 698 | } | ||
| 684 | } | 699 | } |
| 685 | if (passed_tests == 0) { | 700 | if (passed_tests == 0) { |
| 686 | return {}; | 701 | return std::nullopt; |
| 702 | } | ||
| 703 | if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | ||
| 687 | // In Accurate GPU all tests should pass, else we recycle | 704 | // In Accurate GPU all tests should pass, else we recycle |
| 688 | } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | 705 | return std::nullopt; |
| 689 | return {}; | ||
| 690 | } | 706 | } |
| 707 | |||
| 708 | const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); | ||
| 691 | for (const auto& surface : overlaps) { | 709 | for (const auto& surface : overlaps) { |
| 692 | Unregister(surface); | 710 | Unregister(surface); |
| 693 | } | 711 | } |
| 712 | |||
| 694 | new_surface->MarkAsModified(modified, Tick()); | 713 | new_surface->MarkAsModified(modified, Tick()); |
| 695 | Register(new_surface); | 714 | Register(new_surface); |
| 696 | return {{new_surface, new_surface->GetMainView()}}; | 715 | return {{new_surface, new_surface->GetMainView()}}; |
| @@ -708,53 +727,11 @@ private: | |||
| 708 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | 727 | * @param preserve_contents Indicates that the new surface should be loaded from memory or |
| 709 | * left blank. | 728 | * left blank. |
| 710 | */ | 729 | */ |
| 711 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | 730 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, |
| 712 | const SurfaceParams& params, | 731 | const SurfaceParams& params, |
| 713 | const GPUVAddr gpu_addr, | 732 | GPUVAddr gpu_addr, VAddr cpu_addr, |
| 714 | const VAddr cpu_addr, | ||
| 715 | bool preserve_contents) { | 733 | bool preserve_contents) { |
| 716 | if (params.target == SurfaceTarget::Texture3D) { | 734 | if (params.target != SurfaceTarget::Texture3D) { |
| 717 | bool failed = false; | ||
| 718 | if (params.num_levels > 1) { | ||
| 719 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 720 | return std::nullopt; | ||
| 721 | } | ||
| 722 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 723 | bool modified = false; | ||
| 724 | for (auto& surface : overlaps) { | ||
| 725 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 726 | if (src_params.target != SurfaceTarget::Texture2D) { | ||
| 727 | failed = true; | ||
| 728 | break; | ||
| 729 | } | ||
| 730 | if (src_params.height != params.height) { | ||
| 731 | failed = true; | ||
| 732 | break; | ||
| 733 | } | ||
| 734 | if (src_params.block_depth != params.block_depth || | ||
| 735 | src_params.block_height != params.block_height) { | ||
| 736 | failed = true; | ||
| 737 | break; | ||
| 738 | } | ||
| 739 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | ||
| 740 | const auto offsets = params.GetBlockOffsetXYZ(offset); | ||
| 741 | const auto z = std::get<2>(offsets); | ||
| 742 | modified |= surface->IsModified(); | ||
| 743 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | ||
| 744 | 1); | ||
| 745 | ImageCopy(surface, new_surface, copy_params); | ||
| 746 | } | ||
| 747 | if (failed) { | ||
| 748 | return std::nullopt; | ||
| 749 | } | ||
| 750 | for (const auto& surface : overlaps) { | ||
| 751 | Unregister(surface); | ||
| 752 | } | ||
| 753 | new_surface->MarkAsModified(modified, Tick()); | ||
| 754 | Register(new_surface); | ||
| 755 | auto view = new_surface->GetMainView(); | ||
| 756 | return {{std::move(new_surface), view}}; | ||
| 757 | } else { | ||
| 758 | for (const auto& surface : overlaps) { | 735 | for (const auto& surface : overlaps) { |
| 759 | if (!surface->MatchTarget(params.target)) { | 736 | if (!surface->MatchTarget(params.target)) { |
| 760 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | 737 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { |
| @@ -770,11 +747,60 @@ private: | |||
| 770 | continue; | 747 | continue; |
| 771 | } | 748 | } |
| 772 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | 749 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { |
| 773 | return {{surface, surface->GetMainView()}}; | 750 | return std::make_pair(surface, surface->GetMainView()); |
| 774 | } | 751 | } |
| 775 | } | 752 | } |
| 776 | return InitializeSurface(gpu_addr, params, preserve_contents); | 753 | return InitializeSurface(gpu_addr, params, preserve_contents); |
| 777 | } | 754 | } |
| 755 | |||
| 756 | if (params.num_levels > 1) { | ||
| 757 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 758 | return std::nullopt; | ||
| 759 | } | ||
| 760 | |||
| 761 | if (overlaps.size() == 1) { | ||
| 762 | const auto& surface = overlaps[0]; | ||
| 763 | const SurfaceParams& overlap_params = surface->GetSurfaceParams(); | ||
| 764 | // Don't attempt to render to textures with more than one level for now | ||
| 765 | // The texture has to be to the right or the sample address if we want to render to it | ||
| 766 | if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { | ||
| 767 | const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); | ||
| 768 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | ||
| 769 | if (slice < overlap_params.depth) { | ||
| 770 | auto view = surface->Emplace3DView(slice, params.depth, 0, 1); | ||
| 771 | return std::make_pair(std::move(surface), std::move(view)); | ||
| 772 | } | ||
| 773 | } | ||
| 774 | } | ||
| 775 | |||
| 776 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 777 | bool modified = false; | ||
| 778 | |||
| 779 | for (auto& surface : overlaps) { | ||
| 780 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 781 | if (src_params.target != SurfaceTarget::Texture2D || | ||
| 782 | src_params.height != params.height || | ||
| 783 | src_params.block_depth != params.block_depth || | ||
| 784 | src_params.block_height != params.block_height) { | ||
| 785 | return std::nullopt; | ||
| 786 | } | ||
| 787 | modified |= surface->IsModified(); | ||
| 788 | |||
| 789 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | ||
| 790 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | ||
| 791 | const u32 width = params.width; | ||
| 792 | const u32 height = params.height; | ||
| 793 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); | ||
| 794 | ImageCopy(surface, new_surface, copy_params); | ||
| 795 | } | ||
| 796 | for (const auto& surface : overlaps) { | ||
| 797 | Unregister(surface); | ||
| 798 | } | ||
| 799 | new_surface->MarkAsModified(modified, Tick()); | ||
| 800 | Register(new_surface); | ||
| 801 | |||
| 802 | TView view = new_surface->GetMainView(); | ||
| 803 | return std::make_pair(std::move(new_surface), std::move(view)); | ||
| 778 | } | 804 | } |
| 779 | 805 | ||
| 780 | /** | 806 | /** |
| @@ -810,7 +836,7 @@ private: | |||
| 810 | TSurface& current_surface = iter->second; | 836 | TSurface& current_surface = iter->second; |
| 811 | const auto topological_result = current_surface->MatchesTopology(params); | 837 | const auto topological_result = current_surface->MatchesTopology(params); |
| 812 | if (topological_result != MatchTopologyResult::FullMatch) { | 838 | if (topological_result != MatchTopologyResult::FullMatch) { |
| 813 | std::vector<TSurface> overlaps{current_surface}; | 839 | VectorSurface overlaps{current_surface}; |
| 814 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 840 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
| 815 | topological_result); | 841 | topological_result); |
| 816 | } | 842 | } |
| @@ -852,7 +878,7 @@ private: | |||
| 852 | } | 878 | } |
| 853 | } | 879 | } |
| 854 | 880 | ||
| 855 | // Check if it's a 3D texture | 881 | // Manage 3D textures |
| 856 | if (params.block_depth > 0) { | 882 | if (params.block_depth > 0) { |
| 857 | auto surface = | 883 | auto surface = |
| 858 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); | 884 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); |
| @@ -868,12 +894,9 @@ private: | |||
| 868 | // two things either the candidate surface is a supertexture of the overlap | 894 | // two things either the candidate surface is a supertexture of the overlap |
| 869 | // or they don't match in any known way. | 895 | // or they don't match in any known way. |
| 870 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | 896 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { |
| 871 | if (current_surface->GetGpuAddr() == gpu_addr) { | 897 | const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); |
| 872 | std::optional<std::pair<TSurface, TView>> view = | 898 | if (view) { |
| 873 | TryReconstructSurface(overlaps, params, gpu_addr); | 899 | return *view; |
| 874 | if (view) { | ||
| 875 | return *view; | ||
| 876 | } | ||
| 877 | } | 900 | } |
| 878 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 901 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
| 879 | MatchTopologyResult::FullMatch); | 902 | MatchTopologyResult::FullMatch); |
| @@ -991,7 +1014,9 @@ private: | |||
| 991 | params.target = target; | 1014 | params.target = target; |
| 992 | params.is_tiled = false; | 1015 | params.is_tiled = false; |
| 993 | params.srgb_conversion = false; | 1016 | params.srgb_conversion = false; |
| 994 | params.is_layered = false; | 1017 | params.is_layered = |
| 1018 | target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || | ||
| 1019 | target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; | ||
| 995 | params.block_width = 0; | 1020 | params.block_width = 0; |
| 996 | params.block_height = 0; | 1021 | params.block_height = 0; |
| 997 | params.block_depth = 0; | 1022 | params.block_depth = 0; |
| @@ -1124,23 +1149,25 @@ private: | |||
| 1124 | } | 1149 | } |
| 1125 | } | 1150 | } |
| 1126 | 1151 | ||
| 1127 | std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1152 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { |
| 1128 | if (size == 0) { | 1153 | if (size == 0) { |
| 1129 | return {}; | 1154 | return {}; |
| 1130 | } | 1155 | } |
| 1131 | const VAddr cpu_addr_end = cpu_addr + size; | 1156 | const VAddr cpu_addr_end = cpu_addr + size; |
| 1132 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1133 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1157 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; |
| 1134 | std::vector<TSurface> surfaces; | 1158 | VectorSurface surfaces; |
| 1135 | while (start <= end) { | 1159 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { |
| 1136 | std::vector<TSurface>& list = registry[start]; | 1160 | const auto it = registry.find(start); |
| 1137 | for (auto& surface : list) { | 1161 | if (it == registry.end()) { |
| 1138 | if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1162 | continue; |
| 1139 | surface->MarkAsPicked(true); | 1163 | } |
| 1140 | surfaces.push_back(surface); | 1164 | for (auto& surface : it->second) { |
| 1165 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | ||
| 1166 | continue; | ||
| 1141 | } | 1167 | } |
| 1168 | surface->MarkAsPicked(true); | ||
| 1169 | surfaces.push_back(surface); | ||
| 1142 | } | 1170 | } |
| 1143 | start++; | ||
| 1144 | } | 1171 | } |
| 1145 | for (auto& surface : surfaces) { | 1172 | for (auto& surface : surfaces) { |
| 1146 | surface->MarkAsPicked(false); | 1173 | surface->MarkAsPicked(false); |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 1adf8932b..1f5e43043 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -106,6 +106,9 @@ public: | |||
| 106 | format.setVersion(4, 3); | 106 | format.setVersion(4, 3); |
| 107 | format.setProfile(QSurfaceFormat::CompatibilityProfile); | 107 | format.setProfile(QSurfaceFormat::CompatibilityProfile); |
| 108 | format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); | 108 | format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); |
| 109 | if (Settings::values.renderer_debug) { | ||
| 110 | format.setOption(QSurfaceFormat::FormatOption::DebugContext); | ||
| 111 | } | ||
| 109 | // TODO: expose a setting for buffer value (ie default/single/double/triple) | 112 | // TODO: expose a setting for buffer value (ie default/single/double/triple) |
| 110 | format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); | 113 | format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); |
| 111 | format.setSwapInterval(0); | 114 | format.setSwapInterval(0); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 27775701d..32c81dc70 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -533,6 +533,8 @@ void Config::ReadDebuggingValues() { | |||
| 533 | Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); | 533 | Settings::values.quest_flag = ReadSetting(QStringLiteral("quest_flag"), false).toBool(); |
| 534 | Settings::values.disable_cpu_opt = | 534 | Settings::values.disable_cpu_opt = |
| 535 | ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool(); | 535 | ReadSetting(QStringLiteral("disable_cpu_opt"), false).toBool(); |
| 536 | Settings::values.disable_macro_jit = | ||
| 537 | ReadSetting(QStringLiteral("disable_macro_jit"), false).toBool(); | ||
| 536 | 538 | ||
| 537 | qt_config->endGroup(); | 539 | qt_config->endGroup(); |
| 538 | } | 540 | } |
| @@ -629,13 +631,11 @@ void Config::ReadRendererValues() { | |||
| 629 | static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); | 631 | static_cast<Settings::RendererBackend>(ReadSetting(QStringLiteral("backend"), 0).toInt()); |
| 630 | Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); | 632 | Settings::values.renderer_debug = ReadSetting(QStringLiteral("debug"), false).toBool(); |
| 631 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); | 633 | Settings::values.vulkan_device = ReadSetting(QStringLiteral("vulkan_device"), 0).toInt(); |
| 632 | Settings::values.resolution_factor = | ||
| 633 | ReadSetting(QStringLiteral("resolution_factor"), 1.0).toFloat(); | ||
| 634 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); | 634 | Settings::values.aspect_ratio = ReadSetting(QStringLiteral("aspect_ratio"), 0).toInt(); |
| 635 | Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); | 635 | Settings::values.max_anisotropy = ReadSetting(QStringLiteral("max_anisotropy"), 0).toInt(); |
| 636 | Settings::values.use_frame_limit = | 636 | Settings::values.use_frame_limit = |
| 637 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); | 637 | ReadSetting(QStringLiteral("use_frame_limit"), true).toBool(); |
| 638 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | 638 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toUInt(); |
| 639 | Settings::values.use_disk_shader_cache = | 639 | Settings::values.use_disk_shader_cache = |
| 640 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); | 640 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); |
| 641 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); | 641 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); |
| @@ -643,6 +643,8 @@ void Config::ReadRendererValues() { | |||
| 643 | Settings::values.use_asynchronous_gpu_emulation = | 643 | Settings::values.use_asynchronous_gpu_emulation = |
| 644 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); | 644 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); |
| 645 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); | 645 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); |
| 646 | Settings::values.use_assembly_shaders = | ||
| 647 | ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool(); | ||
| 646 | Settings::values.use_fast_gpu_time = | 648 | Settings::values.use_fast_gpu_time = |
| 647 | ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); | 649 | ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); |
| 648 | Settings::values.force_30fps_mode = | 650 | Settings::values.force_30fps_mode = |
| @@ -718,8 +720,6 @@ void Config::ReadUIValues() { | |||
| 718 | .toString(); | 720 | .toString(); |
| 719 | UISettings::values.enable_discord_presence = | 721 | UISettings::values.enable_discord_presence = |
| 720 | ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); | 722 | ReadSetting(QStringLiteral("enable_discord_presence"), true).toBool(); |
| 721 | UISettings::values.screenshot_resolution_factor = | ||
| 722 | static_cast<u16>(ReadSetting(QStringLiteral("screenshot_resolution_factor"), 0).toUInt()); | ||
| 723 | UISettings::values.select_user_on_boot = | 723 | UISettings::values.select_user_on_boot = |
| 724 | ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); | 724 | ReadSetting(QStringLiteral("select_user_on_boot"), false).toBool(); |
| 725 | 725 | ||
| @@ -1009,6 +1009,7 @@ void Config::SaveDebuggingValues() { | |||
| 1009 | WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); | 1009 | WriteSetting(QStringLiteral("dump_nso"), Settings::values.dump_nso, false); |
| 1010 | WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); | 1010 | WriteSetting(QStringLiteral("quest_flag"), Settings::values.quest_flag, false); |
| 1011 | WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false); | 1011 | WriteSetting(QStringLiteral("disable_cpu_opt"), Settings::values.disable_cpu_opt, false); |
| 1012 | WriteSetting(QStringLiteral("disable_macro_jit"), Settings::values.disable_macro_jit, false); | ||
| 1012 | 1013 | ||
| 1013 | qt_config->endGroup(); | 1014 | qt_config->endGroup(); |
| 1014 | } | 1015 | } |
| @@ -1077,8 +1078,6 @@ void Config::SaveRendererValues() { | |||
| 1077 | WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); | 1078 | WriteSetting(QStringLiteral("backend"), static_cast<int>(Settings::values.renderer_backend), 0); |
| 1078 | WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); | 1079 | WriteSetting(QStringLiteral("debug"), Settings::values.renderer_debug, false); |
| 1079 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); | 1080 | WriteSetting(QStringLiteral("vulkan_device"), Settings::values.vulkan_device, 0); |
| 1080 | WriteSetting(QStringLiteral("resolution_factor"), | ||
| 1081 | static_cast<double>(Settings::values.resolution_factor), 1.0); | ||
| 1082 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); | 1081 | WriteSetting(QStringLiteral("aspect_ratio"), Settings::values.aspect_ratio, 0); |
| 1083 | WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); | 1082 | WriteSetting(QStringLiteral("max_anisotropy"), Settings::values.max_anisotropy, 0); |
| 1084 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); | 1083 | WriteSetting(QStringLiteral("use_frame_limit"), Settings::values.use_frame_limit, true); |
| @@ -1090,6 +1089,8 @@ void Config::SaveRendererValues() { | |||
| 1090 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), | 1089 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), |
| 1091 | Settings::values.use_asynchronous_gpu_emulation, false); | 1090 | Settings::values.use_asynchronous_gpu_emulation, false); |
| 1092 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | 1091 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); |
| 1092 | WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders, | ||
| 1093 | false); | ||
| 1093 | WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); | 1094 | WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); |
| 1094 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); | 1095 | WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); |
| 1095 | 1096 | ||
| @@ -1152,8 +1153,6 @@ void Config::SaveUIValues() { | |||
| 1152 | QString::fromUtf8(UISettings::themes[0].second)); | 1153 | QString::fromUtf8(UISettings::themes[0].second)); |
| 1153 | WriteSetting(QStringLiteral("enable_discord_presence"), | 1154 | WriteSetting(QStringLiteral("enable_discord_presence"), |
| 1154 | UISettings::values.enable_discord_presence, true); | 1155 | UISettings::values.enable_discord_presence, true); |
| 1155 | WriteSetting(QStringLiteral("screenshot_resolution_factor"), | ||
| 1156 | UISettings::values.screenshot_resolution_factor, 0); | ||
| 1157 | WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, | 1156 | WriteSetting(QStringLiteral("select_user_on_boot"), UISettings::values.select_user_on_boot, |
| 1158 | false); | 1157 | false); |
| 1159 | 1158 | ||
diff --git a/src/yuzu/configuration/configure_debug.cpp b/src/yuzu/configuration/configure_debug.cpp index c2026763e..2c77441fd 100644 --- a/src/yuzu/configuration/configure_debug.cpp +++ b/src/yuzu/configuration/configure_debug.cpp | |||
| @@ -39,6 +39,8 @@ void ConfigureDebug::SetConfiguration() { | |||
| 39 | ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt); | 39 | ui->disable_cpu_opt->setChecked(Settings::values.disable_cpu_opt); |
| 40 | ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); | 40 | ui->enable_graphics_debugging->setEnabled(!Core::System::GetInstance().IsPoweredOn()); |
| 41 | ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); | 41 | ui->enable_graphics_debugging->setChecked(Settings::values.renderer_debug); |
| 42 | ui->disable_macro_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn()); | ||
| 43 | ui->disable_macro_jit->setChecked(Settings::values.disable_macro_jit); | ||
| 42 | } | 44 | } |
| 43 | 45 | ||
| 44 | void ConfigureDebug::ApplyConfiguration() { | 46 | void ConfigureDebug::ApplyConfiguration() { |
| @@ -51,6 +53,7 @@ void ConfigureDebug::ApplyConfiguration() { | |||
| 51 | Settings::values.quest_flag = ui->quest_flag->isChecked(); | 53 | Settings::values.quest_flag = ui->quest_flag->isChecked(); |
| 52 | Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked(); | 54 | Settings::values.disable_cpu_opt = ui->disable_cpu_opt->isChecked(); |
| 53 | Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); | 55 | Settings::values.renderer_debug = ui->enable_graphics_debugging->isChecked(); |
| 56 | Settings::values.disable_macro_jit = ui->disable_macro_jit->isChecked(); | ||
| 54 | Debugger::ToggleConsole(); | 57 | Debugger::ToggleConsole(); |
| 55 | Log::Filter filter; | 58 | Log::Filter filter; |
| 56 | filter.ParseFilterString(Settings::values.log_filter); | 59 | filter.ParseFilterString(Settings::values.log_filter); |
diff --git a/src/yuzu/configuration/configure_debug.ui b/src/yuzu/configuration/configure_debug.ui index e0d4c4a44..46f0208c6 100644 --- a/src/yuzu/configuration/configure_debug.ui +++ b/src/yuzu/configuration/configure_debug.ui | |||
| @@ -148,6 +148,19 @@ | |||
| 148 | </property> | 148 | </property> |
| 149 | </widget> | 149 | </widget> |
| 150 | </item> | 150 | </item> |
| 151 | <item> | ||
| 152 | <widget class="QCheckBox" name="disable_macro_jit"> | ||
| 153 | <property name="enabled"> | ||
| 154 | <bool>true</bool> | ||
| 155 | </property> | ||
| 156 | <property name="whatsThis"> | ||
| 157 | <string>When checked, it disables the macro Just In Time compiler. Enabled this makes games run slower</string> | ||
| 158 | </property> | ||
| 159 | <property name="text"> | ||
| 160 | <string>Disable Macro JIT</string> | ||
| 161 | </property> | ||
| 162 | </widget> | ||
| 163 | </item> | ||
| 151 | </layout> | 164 | </layout> |
| 152 | </widget> | 165 | </widget> |
| 153 | </item> | 166 | </item> |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index ea667caef..304625cd7 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -19,47 +19,6 @@ | |||
| 19 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 19 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 20 | #endif | 20 | #endif |
| 21 | 21 | ||
| 22 | namespace { | ||
| 23 | enum class Resolution : int { | ||
| 24 | Auto, | ||
| 25 | Scale1x, | ||
| 26 | Scale2x, | ||
| 27 | Scale3x, | ||
| 28 | Scale4x, | ||
| 29 | }; | ||
| 30 | |||
| 31 | float ToResolutionFactor(Resolution option) { | ||
| 32 | switch (option) { | ||
| 33 | case Resolution::Auto: | ||
| 34 | return 0.f; | ||
| 35 | case Resolution::Scale1x: | ||
| 36 | return 1.f; | ||
| 37 | case Resolution::Scale2x: | ||
| 38 | return 2.f; | ||
| 39 | case Resolution::Scale3x: | ||
| 40 | return 3.f; | ||
| 41 | case Resolution::Scale4x: | ||
| 42 | return 4.f; | ||
| 43 | } | ||
| 44 | return 0.f; | ||
| 45 | } | ||
| 46 | |||
| 47 | Resolution FromResolutionFactor(float factor) { | ||
| 48 | if (factor == 0.f) { | ||
| 49 | return Resolution::Auto; | ||
| 50 | } else if (factor == 1.f) { | ||
| 51 | return Resolution::Scale1x; | ||
| 52 | } else if (factor == 2.f) { | ||
| 53 | return Resolution::Scale2x; | ||
| 54 | } else if (factor == 3.f) { | ||
| 55 | return Resolution::Scale3x; | ||
| 56 | } else if (factor == 4.f) { | ||
| 57 | return Resolution::Scale4x; | ||
| 58 | } | ||
| 59 | return Resolution::Auto; | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | ConfigureGraphics::ConfigureGraphics(QWidget* parent) | 22 | ConfigureGraphics::ConfigureGraphics(QWidget* parent) |
| 64 | : QWidget(parent), ui(new Ui::ConfigureGraphics) { | 23 | : QWidget(parent), ui(new Ui::ConfigureGraphics) { |
| 65 | vulkan_device = Settings::values.vulkan_device; | 24 | vulkan_device = Settings::values.vulkan_device; |
| @@ -99,8 +58,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 99 | 58 | ||
| 100 | ui->api->setEnabled(runtime_lock); | 59 | ui->api->setEnabled(runtime_lock); |
| 101 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); | 60 | ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend)); |
| 102 | ui->resolution_factor_combobox->setCurrentIndex( | ||
| 103 | static_cast<int>(FromResolutionFactor(Settings::values.resolution_factor))); | ||
| 104 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); | 61 | ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio); |
| 105 | ui->use_disk_shader_cache->setEnabled(runtime_lock); | 62 | ui->use_disk_shader_cache->setEnabled(runtime_lock); |
| 106 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 63 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| @@ -114,8 +71,6 @@ void ConfigureGraphics::SetConfiguration() { | |||
| 114 | void ConfigureGraphics::ApplyConfiguration() { | 71 | void ConfigureGraphics::ApplyConfiguration() { |
| 115 | Settings::values.renderer_backend = GetCurrentGraphicsBackend(); | 72 | Settings::values.renderer_backend = GetCurrentGraphicsBackend(); |
| 116 | Settings::values.vulkan_device = vulkan_device; | 73 | Settings::values.vulkan_device = vulkan_device; |
| 117 | Settings::values.resolution_factor = | ||
| 118 | ToResolutionFactor(static_cast<Resolution>(ui->resolution_factor_combobox->currentIndex())); | ||
| 119 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); | 74 | Settings::values.aspect_ratio = ui->aspect_ratio_combobox->currentIndex(); |
| 120 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 75 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 121 | Settings::values.use_asynchronous_gpu_emulation = | 76 | Settings::values.use_asynchronous_gpu_emulation = |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index c816d6108..6e75447a5 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -85,46 +85,6 @@ | |||
| 85 | </widget> | 85 | </widget> |
| 86 | </item> | 86 | </item> |
| 87 | <item> | 87 | <item> |
| 88 | <layout class="QHBoxLayout" name="horizontalLayout_2"> | ||
| 89 | <item> | ||
| 90 | <widget class="QLabel" name="label"> | ||
| 91 | <property name="text"> | ||
| 92 | <string>Internal Resolution:</string> | ||
| 93 | </property> | ||
| 94 | </widget> | ||
| 95 | </item> | ||
| 96 | <item> | ||
| 97 | <widget class="QComboBox" name="resolution_factor_combobox"> | ||
| 98 | <item> | ||
| 99 | <property name="text"> | ||
| 100 | <string>Auto (Window Size)</string> | ||
| 101 | </property> | ||
| 102 | </item> | ||
| 103 | <item> | ||
| 104 | <property name="text"> | ||
| 105 | <string>Native (1280x720)</string> | ||
| 106 | </property> | ||
| 107 | </item> | ||
| 108 | <item> | ||
| 109 | <property name="text"> | ||
| 110 | <string>2x Native (2560x1440)</string> | ||
| 111 | </property> | ||
| 112 | </item> | ||
| 113 | <item> | ||
| 114 | <property name="text"> | ||
| 115 | <string>3x Native (3840x2160)</string> | ||
| 116 | </property> | ||
| 117 | </item> | ||
| 118 | <item> | ||
| 119 | <property name="text"> | ||
| 120 | <string>4x Native (5120x2880)</string> | ||
| 121 | </property> | ||
| 122 | </item> | ||
| 123 | </widget> | ||
| 124 | </item> | ||
| 125 | </layout> | ||
| 126 | </item> | ||
| 127 | <item> | ||
| 128 | <layout class="QHBoxLayout" name="horizontalLayout_6"> | 88 | <layout class="QHBoxLayout" name="horizontalLayout_6"> |
| 129 | <item> | 89 | <item> |
| 130 | <widget class="QLabel" name="ar_label"> | 90 | <widget class="QLabel" name="ar_label"> |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 5bb2ae555..be5006ad3 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -22,6 +22,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 22 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); | 22 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); |
| 23 | ui->use_vsync->setEnabled(runtime_lock); | 23 | ui->use_vsync->setEnabled(runtime_lock); |
| 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); | 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); |
| 25 | ui->use_assembly_shaders->setEnabled(runtime_lock); | ||
| 26 | ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders); | ||
| 25 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); | 27 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); |
| 26 | ui->force_30fps_mode->setEnabled(runtime_lock); | 28 | ui->force_30fps_mode->setEnabled(runtime_lock); |
| 27 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); | 29 | ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); |
| @@ -33,6 +35,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { | |||
| 33 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); | 35 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); |
| 34 | Settings::values.gpu_accuracy = gpu_accuracy; | 36 | Settings::values.gpu_accuracy = gpu_accuracy; |
| 35 | Settings::values.use_vsync = ui->use_vsync->isChecked(); | 37 | Settings::values.use_vsync = ui->use_vsync->isChecked(); |
| 38 | Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked(); | ||
| 36 | Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); | 39 | Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); |
| 37 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | 40 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); |
| 38 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); | 41 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 770b80c50..0021607ac 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -63,6 +63,16 @@ | |||
| 63 | </widget> | 63 | </widget> |
| 64 | </item> | 64 | </item> |
| 65 | <item> | 65 | <item> |
| 66 | <widget class="QCheckBox" name="use_assembly_shaders"> | ||
| 67 | <property name="toolTip"> | ||
| 68 | <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string> | ||
| 69 | </property> | ||
| 70 | <property name="text"> | ||
| 71 | <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string> | ||
| 72 | </property> | ||
| 73 | </widget> | ||
| 74 | </item> | ||
| 75 | <item> | ||
| 66 | <widget class="QCheckBox" name="force_30fps_mode"> | 76 | <widget class="QCheckBox" name="force_30fps_mode"> |
| 67 | <property name="text"> | 77 | <property name="text"> |
| 68 | <string>Force 30 FPS mode</string> | 78 | <string>Force 30 FPS mode</string> |
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp index e4eb5594b..a05fa64ba 100644 --- a/src/yuzu/configuration/configure_input_player.cpp +++ b/src/yuzu/configuration/configure_input_player.cpp | |||
| @@ -480,7 +480,9 @@ void ConfigureInputPlayer::RestoreDefaults() { | |||
| 480 | SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]); | 480 | SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]); |
| 481 | } | 481 | } |
| 482 | } | 482 | } |
| 483 | |||
| 483 | UpdateButtonLabels(); | 484 | UpdateButtonLabels(); |
| 485 | ApplyConfiguration(); | ||
| 484 | } | 486 | } |
| 485 | 487 | ||
| 486 | void ConfigureInputPlayer::ClearAll() { | 488 | void ConfigureInputPlayer::ClearAll() { |
| @@ -505,6 +507,7 @@ void ConfigureInputPlayer::ClearAll() { | |||
| 505 | } | 507 | } |
| 506 | 508 | ||
| 507 | UpdateButtonLabels(); | 509 | UpdateButtonLabels(); |
| 510 | ApplyConfiguration(); | ||
| 508 | } | 511 | } |
| 509 | 512 | ||
| 510 | void ConfigureInputPlayer::UpdateButtonLabels() { | 513 | void ConfigureInputPlayer::UpdateButtonLabels() { |
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp index ea0079353..a93733b26 100644 --- a/src/yuzu/discord_impl.cpp +++ b/src/yuzu/discord_impl.cpp | |||
| @@ -18,7 +18,7 @@ DiscordImpl::DiscordImpl() { | |||
| 18 | 18 | ||
| 19 | // The number is the client ID for yuzu, it's used for images and the | 19 | // The number is the client ID for yuzu, it's used for images and the |
| 20 | // application name | 20 | // application name |
| 21 | Discord_Initialize("471872241299226636", &handlers, 1, nullptr); | 21 | Discord_Initialize("712465656758665259", &handlers, 1, nullptr); |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | DiscordImpl::~DiscordImpl() { | 24 | DiscordImpl::~DiscordImpl() { |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 0b291c7d0..4119d7907 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -65,6 +65,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual | |||
| 65 | #include "common/logging/backend.h" | 65 | #include "common/logging/backend.h" |
| 66 | #include "common/logging/filter.h" | 66 | #include "common/logging/filter.h" |
| 67 | #include "common/logging/log.h" | 67 | #include "common/logging/log.h" |
| 68 | #include "common/memory_detect.h" | ||
| 68 | #include "common/microprofile.h" | 69 | #include "common/microprofile.h" |
| 69 | #include "common/scm_rev.h" | 70 | #include "common/scm_rev.h" |
| 70 | #include "common/scope_exit.h" | 71 | #include "common/scope_exit.h" |
| @@ -219,6 +220,10 @@ GMainWindow::GMainWindow() | |||
| 219 | LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); | 220 | LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); |
| 220 | #endif | 221 | #endif |
| 221 | LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); | 222 | LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); |
| 223 | LOG_INFO(Frontend, "Host RAM: {:.2f} GB", | ||
| 224 | Common::GetMemInfo().TotalPhysicalMemory / 1024.0f / 1024 / 1024); | ||
| 225 | LOG_INFO(Frontend, "Host Swap: {:.2f} GB", | ||
| 226 | Common::GetMemInfo().TotalSwapMemory / 1024.0f / 1024 / 1024); | ||
| 222 | UpdateWindowTitle(); | 227 | UpdateWindowTitle(); |
| 223 | 228 | ||
| 224 | show(); | 229 | show(); |
| @@ -684,10 +689,7 @@ void GMainWindow::InitializeHotkeys() { | |||
| 684 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; | 689 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; |
| 685 | UpdateStatusBar(); | 690 | UpdateStatusBar(); |
| 686 | }); | 691 | }); |
| 687 | // TODO: Remove this comment/static whenever the next major release of | 692 | constexpr u16 SPEED_LIMIT_STEP = 5; |
| 688 | // MSVC occurs and we make it a requirement (see: | ||
| 689 | // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html) | ||
| 690 | static constexpr u16 SPEED_LIMIT_STEP = 5; | ||
| 691 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), | 693 | connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Increase Speed Limit"), this), |
| 692 | &QShortcut::activated, this, [&] { | 694 | &QShortcut::activated, this, [&] { |
| 693 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { | 695 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 2348e6e0d..659b9f701 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -380,8 +380,6 @@ void Config::ReadValues() { | |||
| 380 | Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); | 380 | Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "debug", false); |
| 381 | Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); | 381 | Settings::values.vulkan_device = sdl2_config->GetInteger("Renderer", "vulkan_device", 0); |
| 382 | 382 | ||
| 383 | Settings::values.resolution_factor = | ||
| 384 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | ||
| 385 | Settings::values.aspect_ratio = | 383 | Settings::values.aspect_ratio = |
| 386 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 384 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 387 | Settings::values.max_anisotropy = | 385 | Settings::values.max_anisotropy = |
| @@ -397,6 +395,8 @@ void Config::ReadValues() { | |||
| 397 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 395 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 398 | Settings::values.use_vsync = | 396 | Settings::values.use_vsync = |
| 399 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); | 397 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); |
| 398 | Settings::values.use_assembly_shaders = | ||
| 399 | sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false); | ||
| 400 | Settings::values.use_fast_gpu_time = | 400 | Settings::values.use_fast_gpu_time = |
| 401 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); | 401 | sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); |
| 402 | 402 | ||
| @@ -430,6 +430,8 @@ void Config::ReadValues() { | |||
| 430 | Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); | 430 | Settings::values.quest_flag = sdl2_config->GetBoolean("Debugging", "quest_flag", false); |
| 431 | Settings::values.disable_cpu_opt = | 431 | Settings::values.disable_cpu_opt = |
| 432 | sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false); | 432 | sdl2_config->GetBoolean("Debugging", "disable_cpu_opt", false); |
| 433 | Settings::values.disable_macro_jit = | ||
| 434 | sdl2_config->GetBoolean("Debugging", "disable_macro_jit", false); | ||
| 433 | 435 | ||
| 434 | const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); | 436 | const auto title_list = sdl2_config->Get("AddOns", "title_ids", ""); |
| 435 | std::stringstream ss(title_list); | 437 | std::stringstream ss(title_list); |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index ae94b51c4..45c07ed5d 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -117,11 +117,6 @@ use_hw_renderer = | |||
| 117 | # 0: Interpreter (slow), 1 (default): JIT (fast) | 117 | # 0: Interpreter (slow), 1 (default): JIT (fast) |
| 118 | use_shader_jit = | 118 | use_shader_jit = |
| 119 | 119 | ||
| 120 | # Resolution scale factor | ||
| 121 | # 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale | ||
| 122 | # factor for the Switch resolution | ||
| 123 | resolution_factor = | ||
| 124 | |||
| 125 | # Aspect ratio | 120 | # Aspect ratio |
| 126 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 121 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 127 | aspect_ratio = | 122 | aspect_ratio = |
| @@ -134,6 +129,10 @@ max_anisotropy = | |||
| 134 | # 0 (default): Off, 1: On | 129 | # 0 (default): Off, 1: On |
| 135 | use_vsync = | 130 | use_vsync = |
| 136 | 131 | ||
| 132 | # Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required. | ||
| 133 | # 0 (default): Off, 1: On | ||
| 134 | use_assembly_shaders = | ||
| 135 | |||
| 137 | # Turns on the frame limiter, which will limit frames output to the target game speed | 136 | # Turns on the frame limiter, which will limit frames output to the target game speed |
| 138 | # 0: Off, 1: On (default) | 137 | # 0: Off, 1: On (default) |
| 139 | use_frame_limit = | 138 | use_frame_limit = |
| @@ -287,6 +286,8 @@ quest_flag = | |||
| 287 | # Determines whether or not JIT CPU optimizations are enabled | 286 | # Determines whether or not JIT CPU optimizations are enabled |
| 288 | # false: Optimizations Enabled, true: Optimizations Disabled | 287 | # false: Optimizations Enabled, true: Optimizations Disabled |
| 289 | disable_cpu_opt = | 288 | disable_cpu_opt = |
| 289 | # Enables/Disables the macro JIT compiler | ||
| 290 | disable_macro_jit=false | ||
| 290 | 291 | ||
| 291 | [WebService] | 292 | [WebService] |
| 292 | # Whether or not to enable telemetry | 293 | # Whether or not to enable telemetry |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 411e7e647..09cc0a3b5 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | |||
| @@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen) | |||
| 98 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); | 98 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); |
| 99 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); | 99 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); |
| 100 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); | 100 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); |
| 101 | if (Settings::values.renderer_debug) { | ||
| 102 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); | ||
| 103 | } | ||
| 101 | SDL_GL_SetSwapInterval(0); | 104 | SDL_GL_SetSwapInterval(0); |
| 102 | 105 | ||
| 103 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, | 106 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index 3be58b15d..1566c2e3f 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -116,8 +116,6 @@ void Config::ReadValues() { | |||
| 116 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); | 116 | Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); |
| 117 | 117 | ||
| 118 | // Renderer | 118 | // Renderer |
| 119 | Settings::values.resolution_factor = | ||
| 120 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); | ||
| 121 | Settings::values.aspect_ratio = | 119 | Settings::values.aspect_ratio = |
| 122 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); | 120 | static_cast<int>(sdl2_config->GetInteger("Renderer", "aspect_ratio", 0)); |
| 123 | Settings::values.max_anisotropy = | 121 | Settings::values.max_anisotropy = |
diff --git a/src/yuzu_tester/default_ini.h b/src/yuzu_tester/default_ini.h index ca203b64d..41bbbbf60 100644 --- a/src/yuzu_tester/default_ini.h +++ b/src/yuzu_tester/default_ini.h | |||
| @@ -21,11 +21,6 @@ use_hw_renderer = | |||
| 21 | # 0: Interpreter (slow), 1 (default): JIT (fast) | 21 | # 0: Interpreter (slow), 1 (default): JIT (fast) |
| 22 | use_shader_jit = | 22 | use_shader_jit = |
| 23 | 23 | ||
| 24 | # Resolution scale factor | ||
| 25 | # 0: Auto (scales resolution to window size), 1: Native Switch screen resolution, Otherwise a scale | ||
| 26 | # factor for the Switch resolution | ||
| 27 | resolution_factor = | ||
| 28 | |||
| 29 | # Aspect ratio | 24 | # Aspect ratio |
| 30 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window | 25 | # 0: Default (16:9), 1: Force 4:3, 2: Force 21:9, 3: Stretch to Window |
| 31 | aspect_ratio = | 26 | aspect_ratio = |