diff options
34 files changed, 758 insertions, 258 deletions
diff --git a/.gitmodules b/.gitmodules index bf3b80d59..2ec9dda62 100644 --- a/.gitmodules +++ b/.gitmodules | |||
| @@ -28,3 +28,6 @@ | |||
| 28 | [submodule "libzip"] | 28 | [submodule "libzip"] |
| 29 | path = externals/libzip/libzip | 29 | path = externals/libzip/libzip |
| 30 | url = https://github.com/nih-at/libzip.git | 30 | url = https://github.com/nih-at/libzip.git |
| 31 | [submodule "xbyak"] | ||
| 32 | path = externals/xbyak | ||
| 33 | url = https://github.com/herumi/xbyak.git | ||
diff --git a/CMakeLists.txt b/CMakeLists.txt index 61321bf0a..a9f669d56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | cmake_minimum_required(VERSION 3.11) | 1 | cmake_minimum_required(VERSION 3.15) |
| 2 | 2 | ||
| 3 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") | 3 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") |
| 4 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") | 4 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") |
| @@ -13,7 +13,7 @@ project(yuzu) | |||
| 13 | option(ENABLE_SDL2 "Enable the SDL2 frontend" ON) | 13 | option(ENABLE_SDL2 "Enable the SDL2 frontend" ON) |
| 14 | 14 | ||
| 15 | option(ENABLE_QT "Enable the Qt frontend" ON) | 15 | option(ENABLE_QT "Enable the Qt frontend" ON) |
| 16 | CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF) | 16 | CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF) |
| 17 | 17 | ||
| 18 | option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) | 18 | option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) |
| 19 | 19 | ||
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 0b40cd1b0..df7a5e0a9 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt | |||
| @@ -75,3 +75,11 @@ if (ENABLE_WEB_SERVICE) | |||
| 75 | target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) | 75 | target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) |
| 76 | target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) | 76 | target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) |
| 77 | endif() | 77 | endif() |
| 78 | |||
| 79 | if (NOT TARGET xbyak) | ||
| 80 | if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) | ||
| 81 | add_library(xbyak INTERFACE) | ||
| 82 | target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak) | ||
| 83 | target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES) | ||
| 84 | endif() | ||
| 85 | endif() | ||
diff --git a/externals/xbyak b/externals/xbyak new file mode 160000 | |||
| Subproject 82b70e665918efc2ee348091742fd0237b3b68c | |||
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 264dff546..24b7a083c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -171,10 +171,12 @@ if(ARCHITECTURE_x86_64) | |||
| 171 | PRIVATE | 171 | PRIVATE |
| 172 | x64/cpu_detect.cpp | 172 | x64/cpu_detect.cpp |
| 173 | x64/cpu_detect.h | 173 | x64/cpu_detect.h |
| 174 | x64/xbyak_abi.h | ||
| 175 | x64/xbyak_util.h | ||
| 174 | ) | 176 | ) |
| 175 | endif() | 177 | endif() |
| 176 | 178 | ||
| 177 | create_target_directory_groups(common) | 179 | create_target_directory_groups(common) |
| 178 | 180 | ||
| 179 | target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) | 181 | target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) |
| 180 | target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) | 182 | target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) |
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h new file mode 100644 index 000000000..794da8a52 --- /dev/null +++ b/src/common/x64/xbyak_abi.h | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | #include <initializer_list> | ||
| 9 | #include <xbyak.h> | ||
| 10 | #include "common/assert.h" | ||
| 11 | |||
| 12 | namespace Common::X64 { | ||
| 13 | |||
| 14 | inline int RegToIndex(const Xbyak::Reg& reg) { | ||
| 15 | using Kind = Xbyak::Reg::Kind; | ||
| 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, | ||
| 17 | "RegSet only support GPRs and XMM registers."); | ||
| 18 | ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15."); | ||
| 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); | ||
| 20 | } | ||
| 21 | |||
| 22 | inline Xbyak::Reg64 IndexToReg64(int reg_index) { | ||
| 23 | ASSERT(reg_index < 16); | ||
| 24 | return Xbyak::Reg64(reg_index); | ||
| 25 | } | ||
| 26 | |||
| 27 | inline Xbyak::Xmm IndexToXmm(int reg_index) { | ||
| 28 | ASSERT(reg_index >= 16 && reg_index < 32); | ||
| 29 | return Xbyak::Xmm(reg_index - 16); | ||
| 30 | } | ||
| 31 | |||
| 32 | inline Xbyak::Reg IndexToReg(int reg_index) { | ||
| 33 | if (reg_index < 16) { | ||
| 34 | return IndexToReg64(reg_index); | ||
| 35 | } else { | ||
| 36 | return IndexToXmm(reg_index); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { | ||
| 41 | std::bitset<32> bits; | ||
| 42 | for (const Xbyak::Reg& reg : regs) { | ||
| 43 | bits[RegToIndex(reg)] = true; | ||
| 44 | } | ||
| 45 | return bits; | ||
| 46 | } | ||
| 47 | |||
| 48 | const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); | ||
| 49 | const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); | ||
| 50 | |||
| 51 | #ifdef _WIN32 | ||
| 52 | |||
| 53 | // Microsoft x64 ABI | ||
| 54 | const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||
| 55 | const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; | ||
| 56 | const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; | ||
| 57 | const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; | ||
| 58 | const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; | ||
| 59 | |||
| 60 | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||
| 61 | // GPRs | ||
| 62 | Xbyak::util::rcx, | ||
| 63 | Xbyak::util::rdx, | ||
| 64 | Xbyak::util::r8, | ||
| 65 | Xbyak::util::r9, | ||
| 66 | Xbyak::util::r10, | ||
| 67 | Xbyak::util::r11, | ||
| 68 | // XMMs | ||
| 69 | Xbyak::util::xmm0, | ||
| 70 | Xbyak::util::xmm1, | ||
| 71 | Xbyak::util::xmm2, | ||
| 72 | Xbyak::util::xmm3, | ||
| 73 | Xbyak::util::xmm4, | ||
| 74 | Xbyak::util::xmm5, | ||
| 75 | }); | ||
| 76 | |||
| 77 | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||
| 78 | // GPRs | ||
| 79 | Xbyak::util::rbx, | ||
| 80 | Xbyak::util::rsi, | ||
| 81 | Xbyak::util::rdi, | ||
| 82 | Xbyak::util::rbp, | ||
| 83 | Xbyak::util::r12, | ||
| 84 | Xbyak::util::r13, | ||
| 85 | Xbyak::util::r14, | ||
| 86 | Xbyak::util::r15, | ||
| 87 | // XMMs | ||
| 88 | Xbyak::util::xmm6, | ||
| 89 | Xbyak::util::xmm7, | ||
| 90 | Xbyak::util::xmm8, | ||
| 91 | Xbyak::util::xmm9, | ||
| 92 | Xbyak::util::xmm10, | ||
| 93 | Xbyak::util::xmm11, | ||
| 94 | Xbyak::util::xmm12, | ||
| 95 | Xbyak::util::xmm13, | ||
| 96 | Xbyak::util::xmm14, | ||
| 97 | Xbyak::util::xmm15, | ||
| 98 | }); | ||
| 99 | |||
| 100 | constexpr size_t ABI_SHADOW_SPACE = 0x20; | ||
| 101 | |||
| 102 | #else | ||
| 103 | |||
| 104 | // System V x86-64 ABI | ||
| 105 | const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||
| 106 | const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; | ||
| 107 | const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; | ||
| 108 | const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; | ||
| 109 | const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; | ||
| 110 | |||
| 111 | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||
| 112 | // GPRs | ||
| 113 | Xbyak::util::rcx, | ||
| 114 | Xbyak::util::rdx, | ||
| 115 | Xbyak::util::rdi, | ||
| 116 | Xbyak::util::rsi, | ||
| 117 | Xbyak::util::r8, | ||
| 118 | Xbyak::util::r9, | ||
| 119 | Xbyak::util::r10, | ||
| 120 | Xbyak::util::r11, | ||
| 121 | // XMMs | ||
| 122 | Xbyak::util::xmm0, | ||
| 123 | Xbyak::util::xmm1, | ||
| 124 | Xbyak::util::xmm2, | ||
| 125 | Xbyak::util::xmm3, | ||
| 126 | Xbyak::util::xmm4, | ||
| 127 | Xbyak::util::xmm5, | ||
| 128 | Xbyak::util::xmm6, | ||
| 129 | Xbyak::util::xmm7, | ||
| 130 | Xbyak::util::xmm8, | ||
| 131 | Xbyak::util::xmm9, | ||
| 132 | Xbyak::util::xmm10, | ||
| 133 | Xbyak::util::xmm11, | ||
| 134 | Xbyak::util::xmm12, | ||
| 135 | Xbyak::util::xmm13, | ||
| 136 | Xbyak::util::xmm14, | ||
| 137 | Xbyak::util::xmm15, | ||
| 138 | }); | ||
| 139 | |||
| 140 | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||
| 141 | // GPRs | ||
| 142 | Xbyak::util::rbx, | ||
| 143 | Xbyak::util::rbp, | ||
| 144 | Xbyak::util::r12, | ||
| 145 | Xbyak::util::r13, | ||
| 146 | Xbyak::util::r14, | ||
| 147 | Xbyak::util::r15, | ||
| 148 | }); | ||
| 149 | |||
| 150 | constexpr size_t ABI_SHADOW_SPACE = 0; | ||
| 151 | |||
| 152 | #endif | ||
| 153 | |||
| 154 | inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | ||
| 155 | size_t needed_frame_size, s32* out_subtraction, | ||
| 156 | s32* out_xmm_offset) { | ||
| 157 | const auto count = (regs & ABI_ALL_GPRS).count(); | ||
| 158 | rsp_alignment -= count * 8; | ||
| 159 | size_t subtraction = 0; | ||
| 160 | const auto xmm_count = (regs & ABI_ALL_XMMS).count(); | ||
| 161 | if (xmm_count) { | ||
| 162 | // If we have any XMMs to save, we must align the stack here. | ||
| 163 | subtraction = rsp_alignment & 0xF; | ||
| 164 | } | ||
| 165 | subtraction += 0x10 * xmm_count; | ||
| 166 | size_t xmm_base_subtraction = subtraction; | ||
| 167 | subtraction += needed_frame_size; | ||
| 168 | subtraction += ABI_SHADOW_SPACE; | ||
| 169 | // Final alignment. | ||
| 170 | rsp_alignment -= subtraction; | ||
| 171 | subtraction += rsp_alignment & 0xF; | ||
| 172 | |||
| 173 | *out_subtraction = (s32)subtraction; | ||
| 174 | *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); | ||
| 175 | } | ||
| 176 | |||
| 177 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 178 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 179 | s32 subtraction, xmm_offset; | ||
| 180 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 181 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 182 | if (regs[i] && ABI_ALL_GPRS[i]) { | ||
| 183 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | if (subtraction != 0) { | ||
| 187 | code.sub(code.rsp, subtraction); | ||
| 188 | } | ||
| 189 | |||
| 190 | for (int i = 0; i < regs.count(); i++) { | ||
| 191 | if (regs.test(i) & ABI_ALL_GPRS.test(i)) { | ||
| 192 | code.push(IndexToReg64(i)); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 197 | if (regs[i] && ABI_ALL_XMMS[i]) { | ||
| 198 | code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); | ||
| 199 | xmm_offset += 0x10; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | return ABI_SHADOW_SPACE; | ||
| 204 | } | ||
| 205 | |||
| 206 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 207 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 208 | s32 subtraction, xmm_offset; | ||
| 209 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 210 | |||
| 211 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 212 | if (regs[i] && ABI_ALL_XMMS[i]) { | ||
| 213 | code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); | ||
| 214 | xmm_offset += 0x10; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | if (subtraction != 0) { | ||
| 219 | code.add(code.rsp, subtraction); | ||
| 220 | } | ||
| 221 | |||
| 222 | // GPRs need to be popped in reverse order | ||
| 223 | for (int i = 15; i >= 0; i--) { | ||
| 224 | if (regs[i]) { | ||
| 225 | code.pop(IndexToReg64(i)); | ||
| 226 | } | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 231 | size_t rsp_alignment, | ||
| 232 | size_t needed_frame_size = 0) { | ||
| 233 | s32 subtraction, xmm_offset; | ||
| 234 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 235 | |||
| 236 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 237 | if (regs[i] && ABI_ALL_GPRS[i]) { | ||
| 238 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | if (subtraction != 0) { | ||
| 243 | code.sub(code.rsp, subtraction); | ||
| 244 | } | ||
| 245 | |||
| 246 | return ABI_SHADOW_SPACE; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 250 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 251 | s32 subtraction, xmm_offset; | ||
| 252 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 253 | |||
| 254 | if (subtraction != 0) { | ||
| 255 | code.add(code.rsp, subtraction); | ||
| 256 | } | ||
| 257 | |||
| 258 | // GPRs need to be popped in reverse order | ||
| 259 | for (int i = 15; i >= 0; i--) { | ||
| 260 | if (regs[i]) { | ||
| 261 | code.pop(IndexToReg64(i)); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | } // namespace Common::X64 | ||
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h new file mode 100644 index 000000000..df17f8cbe --- /dev/null +++ b/src/common/x64/xbyak_util.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include <xbyak.h> | ||
| 9 | #include "common/x64/xbyak_abi.h" | ||
| 10 | |||
| 11 | namespace Common::X64 { | ||
| 12 | |||
| 13 | // Constants for use with cmpps/cmpss | ||
| 14 | enum { | ||
| 15 | CMP_EQ = 0, | ||
| 16 | CMP_LT = 1, | ||
| 17 | CMP_LE = 2, | ||
| 18 | CMP_UNORD = 3, | ||
| 19 | CMP_NEQ = 4, | ||
| 20 | CMP_NLT = 5, | ||
| 21 | CMP_NLE = 6, | ||
| 22 | CMP_ORD = 7, | ||
| 23 | }; | ||
| 24 | |||
| 25 | constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) { | ||
| 26 | const u64 distance = target - (ref + 5); | ||
| 27 | return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL); | ||
| 28 | } | ||
| 29 | |||
| 30 | inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) { | ||
| 31 | return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target); | ||
| 32 | } | ||
| 33 | |||
| 34 | template <typename T> | ||
| 35 | inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) { | ||
| 36 | static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer."); | ||
| 37 | size_t addr = reinterpret_cast<size_t>(f); | ||
| 38 | if (IsWithin2G(code, addr)) { | ||
| 39 | code.call(f); | ||
| 40 | } else { | ||
| 41 | // ABI_RETURN is a safe temp register to use before a call | ||
| 42 | code.mov(ABI_RETURN, addr); | ||
| 43 | code.call(ABI_RETURN); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | } // namespace Common::X64 | ||
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index c84cb1483..72a050de2 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp | |||
| @@ -161,7 +161,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) { | |||
| 161 | {40, nullptr, "AcquireXpadIdEventHandle"}, | 161 | {40, nullptr, "AcquireXpadIdEventHandle"}, |
| 162 | {41, nullptr, "ReleaseXpadIdEventHandle"}, | 162 | {41, nullptr, "ReleaseXpadIdEventHandle"}, |
| 163 | {51, &Hid::ActivateXpad, "ActivateXpad"}, | 163 | {51, &Hid::ActivateXpad, "ActivateXpad"}, |
| 164 | {55, nullptr, "GetXpadIds"}, | 164 | {55, &Hid::GetXpadIDs, "GetXpadIds"}, |
| 165 | {56, nullptr, "ActivateJoyXpad"}, | 165 | {56, nullptr, "ActivateJoyXpad"}, |
| 166 | {58, nullptr, "GetJoyXpadLifoHandle"}, | 166 | {58, nullptr, "GetJoyXpadLifoHandle"}, |
| 167 | {59, nullptr, "GetJoyXpadIds"}, | 167 | {59, nullptr, "GetJoyXpadIds"}, |
| @@ -319,6 +319,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) { | |||
| 319 | rb.Push(RESULT_SUCCESS); | 319 | rb.Push(RESULT_SUCCESS); |
| 320 | } | 320 | } |
| 321 | 321 | ||
| 322 | void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) { | ||
| 323 | IPC::RequestParser rp{ctx}; | ||
| 324 | const auto applet_resource_user_id{rp.Pop<u64>()}; | ||
| 325 | |||
| 326 | LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id); | ||
| 327 | |||
| 328 | IPC::ResponseBuilder rb{ctx, 3}; | ||
| 329 | rb.Push(RESULT_SUCCESS); | ||
| 330 | rb.Push(0); | ||
| 331 | } | ||
| 332 | |||
| 322 | void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { | 333 | void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { |
| 323 | IPC::RequestParser rp{ctx}; | 334 | IPC::RequestParser rp{ctx}; |
| 324 | const auto applet_resource_user_id{rp.Pop<u64>()}; | 335 | const auto applet_resource_user_id{rp.Pop<u64>()}; |
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h index c8ed4ad8b..d481a75f8 100644 --- a/src/core/hle/service/hid/hid.h +++ b/src/core/hle/service/hid/hid.h | |||
| @@ -86,6 +86,7 @@ public: | |||
| 86 | private: | 86 | private: |
| 87 | void CreateAppletResource(Kernel::HLERequestContext& ctx); | 87 | void CreateAppletResource(Kernel::HLERequestContext& ctx); |
| 88 | void ActivateXpad(Kernel::HLERequestContext& ctx); | 88 | void ActivateXpad(Kernel::HLERequestContext& ctx); |
| 89 | void GetXpadIDs(Kernel::HLERequestContext& ctx); | ||
| 89 | void ActivateDebugPad(Kernel::HLERequestContext& ctx); | 90 | void ActivateDebugPad(Kernel::HLERequestContext& ctx); |
| 90 | void ActivateTouchScreen(Kernel::HLERequestContext& ctx); | 91 | void ActivateTouchScreen(Kernel::HLERequestContext& ctx); |
| 91 | void ActivateMouse(Kernel::HLERequestContext& ctx); | 92 | void ActivateMouse(Kernel::HLERequestContext& ctx); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f00c71dae..d6ee82836 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -229,7 +229,7 @@ endif() | |||
| 229 | create_target_directory_groups(video_core) | 229 | create_target_directory_groups(video_core) |
| 230 | 230 | ||
| 231 | target_link_libraries(video_core PUBLIC common core) | 231 | target_link_libraries(video_core PUBLIC common core) |
| 232 | target_link_libraries(video_core PRIVATE glad) | 232 | target_link_libraries(video_core PRIVATE glad xbyak) |
| 233 | 233 | ||
| 234 | if (ENABLE_VULKAN) | 234 | if (ENABLE_VULKAN) |
| 235 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | 235 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d9a4a1b4d..b88fce2cd 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -56,24 +56,28 @@ public: | |||
| 56 | if (use_fast_cbuf || size < max_stream_size) { | 56 | if (use_fast_cbuf || size < max_stream_size) { |
| 57 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { | 57 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { |
| 58 | auto& memory_manager = system.GPU().MemoryManager(); | 58 | auto& memory_manager = system.GPU().MemoryManager(); |
| 59 | const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); | ||
| 59 | if (use_fast_cbuf) { | 60 | if (use_fast_cbuf) { |
| 60 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 61 | u8* dest; |
| 61 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 62 | if (is_granular) { |
| 62 | return ConstBufferUpload(host_ptr, size); | 63 | dest = memory_manager.GetPointer(gpu_addr); |
| 63 | } else { | 64 | } else { |
| 64 | staging_buffer.resize(size); | 65 | staging_buffer.resize(size); |
| 65 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | 66 | dest = staging_buffer.data(); |
| 66 | return ConstBufferUpload(staging_buffer.data(), size); | 67 | memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); |
| 67 | } | 68 | } |
| 69 | return ConstBufferUpload(dest, size); | ||
| 70 | } | ||
| 71 | if (is_granular) { | ||
| 72 | u8* const host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 73 | return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { | ||
| 74 | std::memcpy(dest, host_ptr, size); | ||
| 75 | }); | ||
| 68 | } else { | 76 | } else { |
| 69 | if (memory_manager.IsGranularRange(gpu_addr, size)) { | 77 | return StreamBufferUpload( |
| 70 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 78 | size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { |
| 71 | return StreamBufferUpload(host_ptr, size, alignment); | 79 | memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); |
| 72 | } else { | 80 | }); |
| 73 | staging_buffer.resize(size); | ||
| 74 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 75 | return StreamBufferUpload(staging_buffer.data(), size, alignment); | ||
| 76 | } | ||
| 77 | } | 81 | } |
| 78 | } | 82 | } |
| 79 | } | 83 | } |
| @@ -101,7 +105,9 @@ public: | |||
| 101 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | 105 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, |
| 102 | std::size_t alignment = 4) { | 106 | std::size_t alignment = 4) { |
| 103 | std::lock_guard lock{mutex}; | 107 | std::lock_guard lock{mutex}; |
| 104 | return StreamBufferUpload(raw_pointer, size, alignment); | 108 | return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { |
| 109 | std::memcpy(dest, raw_pointer, size); | ||
| 110 | }); | ||
| 105 | } | 111 | } |
| 106 | 112 | ||
| 107 | void Map(std::size_t max_size) { | 113 | void Map(std::size_t max_size) { |
| @@ -424,11 +430,11 @@ private: | |||
| 424 | map->MarkAsModified(false, 0); | 430 | map->MarkAsModified(false, 0); |
| 425 | } | 431 | } |
| 426 | 432 | ||
| 427 | BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, | 433 | template <typename Callable> |
| 428 | std::size_t alignment) { | 434 | BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { |
| 429 | AlignBuffer(alignment); | 435 | AlignBuffer(alignment); |
| 430 | const std::size_t uploaded_offset = buffer_offset; | 436 | const std::size_t uploaded_offset = buffer_offset; |
| 431 | std::memcpy(buffer_ptr, raw_pointer, size); | 437 | callable(buffer_ptr); |
| 432 | 438 | ||
| 433 | buffer_ptr += size; | 439 | buffer_ptr += size; |
| 434 | buffer_offset += size; | 440 | buffer_offset += size; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 024c9e43b..13ef2e42d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -106,7 +106,11 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 106 | regs.rasterize_enable = 1; | 106 | regs.rasterize_enable = 1; |
| 107 | regs.rt_separate_frag_data = 1; | 107 | regs.rt_separate_frag_data = 1; |
| 108 | regs.framebuffer_srgb = 1; | 108 | regs.framebuffer_srgb = 1; |
| 109 | regs.line_width_aliased = 1.0f; | ||
| 110 | regs.line_width_smooth = 1.0f; | ||
| 109 | regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; | 111 | regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; |
| 112 | regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill; | ||
| 113 | regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill; | ||
| 110 | 114 | ||
| 111 | shadow_state = regs; | 115 | shadow_state = regs; |
| 112 | 116 | ||
| @@ -457,8 +461,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 457 | 461 | ||
| 458 | void Maxwell3D::ProcessQueryGet() { | 462 | void Maxwell3D::ProcessQueryGet() { |
| 459 | // TODO(Subv): Support the other query units. | 463 | // TODO(Subv): Support the other query units. |
| 460 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 464 | if (regs.query.query_get.unit != Regs::QueryUnit::Crop) { |
| 461 | "Units other than CROP are unimplemented"); | 465 | LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented"); |
| 466 | } | ||
| 462 | 467 | ||
| 463 | switch (regs.query.query_get.operation) { | 468 | switch (regs.query.query_get.operation) { |
| 464 | case Regs::QueryOperation::Release: | 469 | case Regs::QueryOperation::Release: |
| @@ -534,8 +539,8 @@ void Maxwell3D::ProcessCounterReset() { | |||
| 534 | rasterizer.ResetCounter(QueryType::SamplesPassed); | 539 | rasterizer.ResetCounter(QueryType::SamplesPassed); |
| 535 | break; | 540 | break; |
| 536 | default: | 541 | default: |
| 537 | LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", | 542 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", |
| 538 | static_cast<int>(regs.counter_reset)); | 543 | static_cast<int>(regs.counter_reset)); |
| 539 | break; | 544 | break; |
| 540 | } | 545 | } |
| 541 | } | 546 | } |
| @@ -592,8 +597,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { | |||
| 592 | system.GPU().GetTicks()); | 597 | system.GPU().GetTicks()); |
| 593 | return {}; | 598 | return {}; |
| 594 | default: | 599 | default: |
| 595 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 600 | LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", |
| 596 | static_cast<u32>(regs.query.query_get.select.Value())); | 601 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 597 | return 1; | 602 | return 1; |
| 598 | } | 603 | } |
| 599 | } | 604 | } |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 466a911db..b772c37d9 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <limits> | ||
| 9 | #include <optional> | 10 | #include <optional> |
| 10 | #include <vector> | 11 | #include <vector> |
| 11 | 12 | ||
| @@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1; | |||
| 26 | 27 | ||
| 27 | constexpr u32 NumStages = 5; | 28 | constexpr u32 NumStages = 5; |
| 28 | 29 | ||
| 29 | constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | 30 | constexpr std::array LimitUBOs = { |
| 30 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, | 31 | GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, |
| 31 | GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; | 32 | GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, |
| 33 | GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS}; | ||
| 32 | 34 | ||
| 33 | constexpr std::array LimitSSBOs = { | 35 | constexpr std::array LimitSSBOs = { |
| 34 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | 36 | GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, |
| 35 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | 37 | GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, |
| 36 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; | 38 | GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS}; |
| 37 | 39 | ||
| 38 | constexpr std::array LimitSamplers = { | 40 | constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, |
| 39 | GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | 41 | GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, |
| 40 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | 42 | GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, |
| 41 | GL_MAX_TEXTURE_IMAGE_UNITS}; | 43 | GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, |
| 44 | GL_MAX_TEXTURE_IMAGE_UNITS, | ||
| 45 | GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS}; | ||
| 42 | 46 | ||
| 43 | constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, | 47 | constexpr std::array LimitImages = { |
| 44 | GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | 48 | GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, |
| 45 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, | 49 | GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, |
| 46 | GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; | 50 | GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS}; |
| 47 | 51 | ||
| 48 | template <typename T> | 52 | template <typename T> |
| 49 | T GetInteger(GLenum pname) { | 53 | T GetInteger(GLenum pname) { |
| @@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | |||
| 85 | return std::exchange(base, base + amount); | 89 | return std::exchange(base, base + amount); |
| 86 | } | 90 | } |
| 87 | 91 | ||
| 92 | std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { | ||
| 93 | std::array<u32, Tegra::Engines::MaxShaderTypes> max; | ||
| 94 | std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(), | ||
| 95 | [](GLenum pname) { return GetInteger<u32>(pname); }); | ||
| 96 | return max; | ||
| 97 | } | ||
| 98 | |||
| 88 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { | 99 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { |
| 89 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; | 100 | std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; |
| 90 | 101 | ||
| @@ -133,6 +144,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin | |||
| 133 | } | 144 | } |
| 134 | 145 | ||
| 135 | bool IsASTCSupported() { | 146 | bool IsASTCSupported() { |
| 147 | static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; | ||
| 136 | static constexpr std::array formats = { | 148 | static constexpr std::array formats = { |
| 137 | GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, | 149 | GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, |
| 138 | GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, | 150 | GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, |
| @@ -149,25 +161,35 @@ bool IsASTCSupported() { | |||
| 149 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, | 161 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, |
| 150 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, | 162 | GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, |
| 151 | }; | 163 | }; |
| 152 | return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { | 164 | static constexpr std::array required_support = { |
| 153 | GLint supported; | 165 | GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE, |
| 154 | glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, | 166 | GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE, |
| 155 | &supported); | 167 | }; |
| 156 | return supported == GL_TRUE; | 168 | |
| 157 | }) == formats.end(); | 169 | for (const GLenum target : targets) { |
| 170 | for (const GLenum format : formats) { | ||
| 171 | for (const GLenum support : required_support) { | ||
| 172 | GLint value; | ||
| 173 | glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value); | ||
| 174 | if (value != GL_FULL_SUPPORT) { | ||
| 175 | return false; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | } | ||
| 180 | return true; | ||
| 158 | } | 181 | } |
| 159 | 182 | ||
| 160 | } // Anonymous namespace | 183 | } // Anonymous namespace |
| 161 | 184 | ||
| 162 | Device::Device() : base_bindings{BuildBaseBindings()} { | 185 | Device::Device() |
| 186 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { | ||
| 163 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | 187 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); |
| 164 | const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); | 188 | const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); |
| 165 | const std::vector extensions = GetExtensions(); | 189 | const std::vector extensions = GetExtensions(); |
| 166 | 190 | ||
| 167 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | 191 | const bool is_nvidia = vendor == "NVIDIA Corporation"; |
| 168 | const bool is_amd = vendor == "ATI Technologies Inc."; | 192 | const bool is_amd = vendor == "ATI Technologies Inc."; |
| 169 | const bool is_intel = vendor == "Intel"; | ||
| 170 | const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; | ||
| 171 | 193 | ||
| 172 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 194 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 173 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 195 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| @@ -182,7 +204,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 182 | has_variable_aoffi = TestVariableAoffi(); | 204 | has_variable_aoffi = TestVariableAoffi(); |
| 183 | has_component_indexing_bug = is_amd; | 205 | has_component_indexing_bug = is_amd; |
| 184 | has_precise_bug = TestPreciseBug(); | 206 | has_precise_bug = TestPreciseBug(); |
| 185 | has_broken_compute = is_intel_proprietary; | ||
| 186 | has_fast_buffer_sub_data = is_nvidia; | 207 | has_fast_buffer_sub_data = is_nvidia; |
| 187 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 208 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 188 | GLAD_GL_NV_compute_program5; | 209 | GLAD_GL_NV_compute_program5; |
| @@ -197,7 +218,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 197 | } | 218 | } |
| 198 | 219 | ||
| 199 | Device::Device(std::nullptr_t) { | 220 | Device::Device(std::nullptr_t) { |
| 200 | uniform_buffer_alignment = 0; | 221 | max_uniform_buffers.fill(std::numeric_limits<u32>::max()); |
| 222 | uniform_buffer_alignment = 4; | ||
| 223 | shader_storage_alignment = 4; | ||
| 201 | max_vertex_attributes = 16; | 224 | max_vertex_attributes = 16; |
| 202 | max_varyings = 15; | 225 | max_varyings = 15; |
| 203 | has_warp_intrinsics = true; | 226 | has_warp_intrinsics = true; |
| @@ -205,9 +228,6 @@ Device::Device(std::nullptr_t) { | |||
| 205 | has_vertex_viewport_layer = true; | 228 | has_vertex_viewport_layer = true; |
| 206 | has_image_load_formatted = true; | 229 | has_image_load_formatted = true; |
| 207 | has_variable_aoffi = true; | 230 | has_variable_aoffi = true; |
| 208 | has_component_indexing_bug = false; | ||
| 209 | has_broken_compute = false; | ||
| 210 | has_precise_bug = false; | ||
| 211 | } | 231 | } |
| 212 | 232 | ||
| 213 | bool Device::TestVariableAoffi() { | 233 | bool Device::TestVariableAoffi() { |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e915dbd86..98cca0254 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -24,6 +24,10 @@ public: | |||
| 24 | explicit Device(); | 24 | explicit Device(); |
| 25 | explicit Device(std::nullptr_t); | 25 | explicit Device(std::nullptr_t); |
| 26 | 26 | ||
| 27 | u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept { | ||
| 28 | return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; | ||
| 29 | } | ||
| 30 | |||
| 27 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { | 31 | const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { |
| 28 | return base_bindings[stage_index]; | 32 | return base_bindings[stage_index]; |
| 29 | } | 33 | } |
| @@ -80,10 +84,6 @@ public: | |||
| 80 | return has_precise_bug; | 84 | return has_precise_bug; |
| 81 | } | 85 | } |
| 82 | 86 | ||
| 83 | bool HasBrokenCompute() const { | ||
| 84 | return has_broken_compute; | ||
| 85 | } | ||
| 86 | |||
| 87 | bool HasFastBufferSubData() const { | 87 | bool HasFastBufferSubData() const { |
| 88 | return has_fast_buffer_sub_data; | 88 | return has_fast_buffer_sub_data; |
| 89 | } | 89 | } |
| @@ -96,7 +96,8 @@ private: | |||
| 96 | static bool TestVariableAoffi(); | 96 | static bool TestVariableAoffi(); |
| 97 | static bool TestPreciseBug(); | 97 | static bool TestPreciseBug(); |
| 98 | 98 | ||
| 99 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; | 99 | std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; |
| 100 | std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; | ||
| 100 | std::size_t uniform_buffer_alignment{}; | 101 | std::size_t uniform_buffer_alignment{}; |
| 101 | std::size_t shader_storage_alignment{}; | 102 | std::size_t shader_storage_alignment{}; |
| 102 | u32 max_vertex_attributes{}; | 103 | u32 max_vertex_attributes{}; |
| @@ -109,7 +110,6 @@ private: | |||
| 109 | bool has_variable_aoffi{}; | 110 | bool has_variable_aoffi{}; |
| 110 | bool has_component_indexing_bug{}; | 111 | bool has_component_indexing_bug{}; |
| 111 | bool has_precise_bug{}; | 112 | bool has_precise_bug{}; |
| 112 | bool has_broken_compute{}; | ||
| 113 | bool has_fast_buffer_sub_data{}; | 113 | bool has_fast_buffer_sub_data{}; |
| 114 | bool use_assembly_shaders{}; | 114 | bool use_assembly_shaders{}; |
| 115 | }; | 115 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 716d43e65..55e79aaf6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -54,6 +54,12 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 | |||
| 54 | 54 | ||
| 55 | namespace { | 55 | namespace { |
| 56 | 56 | ||
| 57 | constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18; | ||
| 58 | constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | ||
| 59 | NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; | ||
| 60 | constexpr std::size_t TOTAL_CONST_BUFFER_BYTES = | ||
| 61 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | ||
| 62 | |||
| 57 | constexpr std::size_t NumSupportedVertexAttributes = 16; | 63 | constexpr std::size_t NumSupportedVertexAttributes = 16; |
| 58 | 64 | ||
| 59 | template <typename Engine, typename Entry> | 65 | template <typename Engine, typename Entry> |
| @@ -104,6 +110,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 104 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | 110 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { |
| 105 | CheckExtensions(); | 111 | CheckExtensions(); |
| 106 | 112 | ||
| 113 | unified_uniform_buffer.Create(); | ||
| 114 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); | ||
| 115 | |||
| 107 | if (device.UseAssemblyShaders()) { | 116 | if (device.UseAssemblyShaders()) { |
| 108 | glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | 117 | glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); |
| 109 | for (const GLuint cbuf : staging_cbufs) { | 118 | for (const GLuint cbuf : staging_cbufs) { |
| @@ -655,10 +664,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 655 | } | 664 | } |
| 656 | 665 | ||
| 657 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 666 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| 658 | if (device.HasBrokenCompute()) { | ||
| 659 | return; | ||
| 660 | } | ||
| 661 | |||
| 662 | buffer_cache.Acquire(); | 667 | buffer_cache.Acquire(); |
| 663 | current_cbuf = 0; | 668 | current_cbuf = 0; |
| 664 | 669 | ||
| @@ -846,34 +851,56 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad | |||
| 846 | MICROPROFILE_SCOPE(OpenGL_UBO); | 851 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 847 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; | 852 | const auto& stages = system.GPU().Maxwell3D().state.shader_stages; |
| 848 | const auto& shader_stage = stages[stage_index]; | 853 | const auto& shader_stage = stages[stage_index]; |
| 854 | const auto& entries = shader->GetEntries(); | ||
| 855 | const bool use_unified = entries.use_unified_uniforms; | ||
| 856 | const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; | ||
| 849 | 857 | ||
| 850 | u32 binding = | 858 | const auto base_bindings = device.GetBaseBindings(stage_index); |
| 851 | device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; | 859 | u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; |
| 852 | for (const auto& entry : shader->GetEntries().const_buffers) { | 860 | for (const auto& entry : entries.const_buffers) { |
| 853 | const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | 861 | const u32 index = entry.GetIndex(); |
| 854 | SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); | 862 | const auto& buffer = shader_stage.const_buffers[index]; |
| 863 | SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, | ||
| 864 | base_unified_offset + index * Maxwell::MaxConstBufferSize); | ||
| 865 | ++binding; | ||
| 866 | } | ||
| 867 | if (use_unified) { | ||
| 868 | const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + | ||
| 869 | entries.global_memory_entries.size()); | ||
| 870 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, | ||
| 871 | base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 855 | } | 872 | } |
| 856 | } | 873 | } |
| 857 | 874 | ||
| 858 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | 875 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { |
| 859 | MICROPROFILE_SCOPE(OpenGL_UBO); | 876 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 860 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 877 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 878 | const auto& entries = kernel->GetEntries(); | ||
| 879 | const bool use_unified = entries.use_unified_uniforms; | ||
| 861 | 880 | ||
| 862 | u32 binding = 0; | 881 | u32 binding = 0; |
| 863 | for (const auto& entry : kernel->GetEntries().const_buffers) { | 882 | for (const auto& entry : entries.const_buffers) { |
| 864 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | 883 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; |
| 865 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | 884 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); |
| 866 | Tegra::Engines::ConstBufferInfo buffer; | 885 | Tegra::Engines::ConstBufferInfo buffer; |
| 867 | buffer.address = config.Address(); | 886 | buffer.address = config.Address(); |
| 868 | buffer.size = config.size; | 887 | buffer.size = config.size; |
| 869 | buffer.enabled = mask[entry.GetIndex()]; | 888 | buffer.enabled = mask[entry.GetIndex()]; |
| 870 | SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); | 889 | SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, |
| 890 | use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); | ||
| 891 | ++binding; | ||
| 892 | } | ||
| 893 | if (use_unified) { | ||
| 894 | const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); | ||
| 895 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, | ||
| 896 | NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 871 | } | 897 | } |
| 872 | } | 898 | } |
| 873 | 899 | ||
| 874 | void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | 900 | void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, |
| 875 | const Tegra::Engines::ConstBufferInfo& buffer, | 901 | const Tegra::Engines::ConstBufferInfo& buffer, |
| 876 | const ConstBufferEntry& entry) { | 902 | const ConstBufferEntry& entry, bool use_unified, |
| 903 | std::size_t unified_offset) { | ||
| 877 | if (!buffer.enabled) { | 904 | if (!buffer.enabled) { |
| 878 | // Set values to zero to unbind buffers | 905 | // Set values to zero to unbind buffers |
| 879 | if (device.UseAssemblyShaders()) { | 906 | if (device.UseAssemblyShaders()) { |
| @@ -889,20 +916,29 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | |||
| 889 | // UBO alignment requirements. | 916 | // UBO alignment requirements. |
| 890 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | 917 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); |
| 891 | 918 | ||
| 892 | const auto alignment = device.GetUniformBufferAlignment(); | 919 | const bool fast_upload = !use_unified && device.HasFastBufferSubData(); |
| 893 | auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, | 920 | |
| 894 | device.HasFastBufferSubData()); | 921 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); |
| 895 | if (!device.UseAssemblyShaders()) { | 922 | const GPUVAddr gpu_addr = buffer.address; |
| 896 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); | 923 | auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); |
| 924 | |||
| 925 | if (device.UseAssemblyShaders()) { | ||
| 926 | UNIMPLEMENTED_IF(use_unified); | ||
| 927 | if (offset != 0) { | ||
| 928 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | ||
| 929 | glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); | ||
| 930 | cbuf = staging_cbuf; | ||
| 931 | offset = 0; | ||
| 932 | } | ||
| 933 | glBindBufferRangeNV(stage, binding, cbuf, offset, size); | ||
| 897 | return; | 934 | return; |
| 898 | } | 935 | } |
| 899 | if (offset != 0) { | 936 | |
| 900 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | 937 | if (use_unified) { |
| 901 | glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); | 938 | glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size); |
| 902 | cbuf = staging_cbuf; | 939 | } else { |
| 903 | offset = 0; | 940 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); |
| 904 | } | 941 | } |
| 905 | glBindBufferRangeNV(stage, binding, cbuf, offset, size); | ||
| 906 | } | 942 | } |
| 907 | 943 | ||
| 908 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { | 944 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { |
| @@ -1024,6 +1060,26 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 1024 | const auto& regs = gpu.regs; | 1060 | const auto& regs = gpu.regs; |
| 1025 | 1061 | ||
| 1026 | const bool dirty_viewport = flags[Dirty::Viewports]; | 1062 | const bool dirty_viewport = flags[Dirty::Viewports]; |
| 1063 | const bool dirty_clip_control = flags[Dirty::ClipControl]; | ||
| 1064 | |||
| 1065 | if (dirty_clip_control || flags[Dirty::FrontFace]) { | ||
| 1066 | flags[Dirty::FrontFace] = false; | ||
| 1067 | |||
| 1068 | GLenum mode = MaxwellToGL::FrontFace(regs.front_face); | ||
| 1069 | if (regs.screen_y_control.triangle_rast_flip != 0 && | ||
| 1070 | regs.viewport_transform[0].scale_y < 0.0f) { | ||
| 1071 | switch (mode) { | ||
| 1072 | case GL_CW: | ||
| 1073 | mode = GL_CCW; | ||
| 1074 | break; | ||
| 1075 | case GL_CCW: | ||
| 1076 | mode = GL_CW; | ||
| 1077 | break; | ||
| 1078 | } | ||
| 1079 | } | ||
| 1080 | glFrontFace(mode); | ||
| 1081 | } | ||
| 1082 | |||
| 1027 | if (dirty_viewport || flags[Dirty::ClipControl]) { | 1083 | if (dirty_viewport || flags[Dirty::ClipControl]) { |
| 1028 | flags[Dirty::ClipControl] = false; | 1084 | flags[Dirty::ClipControl] = false; |
| 1029 | 1085 | ||
| @@ -1121,11 +1177,6 @@ void RasterizerOpenGL::SyncCullMode() { | |||
| 1121 | glDisable(GL_CULL_FACE); | 1177 | glDisable(GL_CULL_FACE); |
| 1122 | } | 1178 | } |
| 1123 | } | 1179 | } |
| 1124 | |||
| 1125 | if (flags[Dirty::FrontFace]) { | ||
| 1126 | flags[Dirty::FrontFace] = false; | ||
| 1127 | glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); | ||
| 1128 | } | ||
| 1129 | } | 1180 | } |
| 1130 | 1181 | ||
| 1131 | void RasterizerOpenGL::SyncPrimitiveRestart() { | 1182 | void RasterizerOpenGL::SyncPrimitiveRestart() { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 87f7fe159..f5dc56a0e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -107,7 +107,8 @@ private: | |||
| 107 | 107 | ||
| 108 | /// Configures a constant buffer. | 108 | /// Configures a constant buffer. |
| 109 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 109 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| 110 | const ConstBufferEntry& entry); | 110 | const ConstBufferEntry& entry, bool use_unified, |
| 111 | std::size_t unified_offset); | ||
| 111 | 112 | ||
| 112 | /// Configures the current global memory entries to use for the draw command. | 113 | /// Configures the current global memory entries to use for the draw command. |
| 113 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); | 114 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); |
| @@ -253,6 +254,7 @@ private: | |||
| 253 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | 254 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; |
| 254 | std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; | 255 | std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; |
| 255 | std::size_t current_cbuf = 0; | 256 | std::size_t current_cbuf = 0; |
| 257 | OGLBuffer unified_uniform_buffer; | ||
| 256 | 258 | ||
| 257 | /// Number of commands queued to the OpenGL driver. Reseted on flush. | 259 | /// Number of commands queued to the OpenGL driver. Reseted on flush. |
| 258 | std::size_t num_queued_commands = 0; | 260 | std::size_t num_queued_commands = 0; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4cd0f36cf..a991ca64a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -241,8 +241,9 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 241 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 241 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 242 | params.disk_cache.SaveEntry(std::move(entry)); | 242 | params.disk_cache.SaveEntry(std::move(entry)); |
| 243 | 243 | ||
| 244 | return std::shared_ptr<CachedShader>(new CachedShader( | 244 | return std::shared_ptr<CachedShader>( |
| 245 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); | 245 | new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), |
| 246 | MakeEntries(params.device, ir, shader_type), std::move(program))); | ||
| 246 | } | 247 | } |
| 247 | 248 | ||
| 248 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 249 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| @@ -265,8 +266,9 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog | |||
| 265 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 266 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 266 | params.disk_cache.SaveEntry(std::move(entry)); | 267 | params.disk_cache.SaveEntry(std::move(entry)); |
| 267 | 268 | ||
| 268 | return std::shared_ptr<CachedShader>(new CachedShader( | 269 | return std::shared_ptr<CachedShader>( |
| 269 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); | 270 | new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), |
| 271 | MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); | ||
| 270 | } | 272 | } |
| 271 | 273 | ||
| 272 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 274 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| @@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 348 | PrecompiledShader shader; | 350 | PrecompiledShader shader; |
| 349 | shader.program = std::move(program); | 351 | shader.program = std::move(program); |
| 350 | shader.registry = std::move(registry); | 352 | shader.registry = std::move(registry); |
| 351 | shader.entries = MakeEntries(ir); | 353 | shader.entries = MakeEntries(device, ir, entry.type); |
| 352 | 354 | ||
| 353 | std::scoped_lock lock{mutex}; | 355 | std::scoped_lock lock{mutex}; |
| 354 | if (callback) { | 356 | if (callback) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 253484968..502b95973 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -61,8 +61,8 @@ struct TextureDerivates {}; | |||
| 61 | using TextureArgument = std::pair<Type, Node>; | 61 | using TextureArgument = std::pair<Type, Node>; |
| 62 | using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; | 62 | using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; |
| 63 | 63 | ||
| 64 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); |
| 65 | static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); | 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); |
| 66 | 66 | ||
| 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt |
| 68 | #define ftou floatBitsToUint | 68 | #define ftou floatBitsToUint |
| @@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 402 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 402 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 403 | } | 403 | } |
| 404 | 404 | ||
| 405 | bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||
| 406 | const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); | ||
| 407 | // We waste one UBO for emulation | ||
| 408 | const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; | ||
| 409 | return num_ubos > num_available_ubos; | ||
| 410 | } | ||
| 411 | |||
| 405 | struct GenericVaryingDescription { | 412 | struct GenericVaryingDescription { |
| 406 | std::string name; | 413 | std::string name; |
| 407 | u8 first_element = 0; | 414 | u8 first_element = 0; |
| @@ -412,8 +419,9 @@ class GLSLDecompiler final { | |||
| 412 | public: | 419 | public: |
| 413 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | 420 | explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, |
| 414 | ShaderType stage, std::string_view identifier, std::string_view suffix) | 421 | ShaderType stage, std::string_view identifier, std::string_view suffix) |
| 415 | : device{device}, ir{ir}, registry{registry}, stage{stage}, | 422 | : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier}, |
| 416 | identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { | 423 | suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{ |
| 424 | UseUnifiedUniforms(device, ir, stage)} { | ||
| 417 | if (stage != ShaderType::Compute) { | 425 | if (stage != ShaderType::Compute) { |
| 418 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | 426 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); |
| 419 | } | 427 | } |
| @@ -834,12 +842,24 @@ private: | |||
| 834 | } | 842 | } |
| 835 | 843 | ||
| 836 | void DeclareConstantBuffers() { | 844 | void DeclareConstantBuffers() { |
| 845 | if (use_unified_uniforms) { | ||
| 846 | const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + | ||
| 847 | static_cast<u32>(ir.GetGlobalMemory().size()); | ||
| 848 | code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", | ||
| 849 | binding); | ||
| 850 | code.AddLine(" uint cbufs[];"); | ||
| 851 | code.AddLine("}};"); | ||
| 852 | code.AddNewLine(); | ||
| 853 | return; | ||
| 854 | } | ||
| 855 | |||
| 837 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; | 856 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; |
| 838 | for (const auto& buffers : ir.GetConstantBuffers()) { | 857 | for (const auto [index, info] : ir.GetConstantBuffers()) { |
| 839 | const auto index = buffers.first; | 858 | const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4; |
| 859 | const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements; | ||
| 840 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, | 860 | code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, |
| 841 | GetConstBufferBlock(index)); | 861 | GetConstBufferBlock(index)); |
| 842 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); | 862 | code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size); |
| 843 | code.AddLine("}};"); | 863 | code.AddLine("}};"); |
| 844 | code.AddNewLine(); | 864 | code.AddNewLine(); |
| 845 | } | 865 | } |
| @@ -1038,42 +1058,51 @@ private: | |||
| 1038 | 1058 | ||
| 1039 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | 1059 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { |
| 1040 | const Node offset = cbuf->GetOffset(); | 1060 | const Node offset = cbuf->GetOffset(); |
| 1061 | const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; | ||
| 1062 | |||
| 1041 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | 1063 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |
| 1042 | // Direct access | 1064 | // Direct access |
| 1043 | const u32 offset_imm = immediate->GetValue(); | 1065 | const u32 offset_imm = immediate->GetValue(); |
| 1044 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | 1066 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); |
| 1045 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | 1067 | if (use_unified_uniforms) { |
| 1046 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | 1068 | return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), |
| 1047 | Type::Uint}; | 1069 | Type::Uint}; |
| 1070 | } else { | ||
| 1071 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1072 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||
| 1073 | Type::Uint}; | ||
| 1074 | } | ||
| 1048 | } | 1075 | } |
| 1049 | 1076 | ||
| 1050 | if (std::holds_alternative<OperationNode>(*offset)) { | 1077 | // Indirect access |
| 1051 | // Indirect access | 1078 | if (use_unified_uniforms) { |
| 1052 | const std::string final_offset = code.GenerateTemporary(); | 1079 | return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, |
| 1053 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | 1080 | Visit(offset).AsUint()), |
| 1081 | Type::Uint}; | ||
| 1082 | } | ||
| 1054 | 1083 | ||
| 1055 | if (!device.HasComponentIndexingBug()) { | 1084 | const std::string final_offset = code.GenerateTemporary(); |
| 1056 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), | 1085 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); |
| 1057 | final_offset, final_offset), | ||
| 1058 | Type::Uint}; | ||
| 1059 | } | ||
| 1060 | 1086 | ||
| 1061 | // AMD's proprietary GLSL compiler emits ill code for variable component access. | 1087 | if (!device.HasComponentIndexingBug()) { |
| 1062 | // To bypass this driver bug generate 4 ifs, one per each component. | 1088 | return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), |
| 1063 | const std::string pack = code.GenerateTemporary(); | 1089 | final_offset, final_offset), |
| 1064 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | 1090 | Type::Uint}; |
| 1065 | final_offset); | ||
| 1066 | |||
| 1067 | const std::string result = code.GenerateTemporary(); | ||
| 1068 | code.AddLine("uint {};", result); | ||
| 1069 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 1070 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, | ||
| 1071 | pack, GetSwizzle(swizzle)); | ||
| 1072 | } | ||
| 1073 | return {result, Type::Uint}; | ||
| 1074 | } | 1091 | } |
| 1075 | 1092 | ||
| 1076 | UNREACHABLE_MSG("Unmanaged offset node type"); | 1093 | // AMD's proprietary GLSL compiler emits ill code for variable component access. |
| 1094 | // To bypass this driver bug generate 4 ifs, one per each component. | ||
| 1095 | const std::string pack = code.GenerateTemporary(); | ||
| 1096 | code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | ||
| 1097 | final_offset); | ||
| 1098 | |||
| 1099 | const std::string result = code.GenerateTemporary(); | ||
| 1100 | code.AddLine("uint {};", result); | ||
| 1101 | for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||
| 1102 | code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack, | ||
| 1103 | GetSwizzle(swizzle)); | ||
| 1104 | } | ||
| 1105 | return {result, Type::Uint}; | ||
| 1077 | } | 1106 | } |
| 1078 | 1107 | ||
| 1079 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | 1108 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { |
| @@ -2344,7 +2373,12 @@ private: | |||
| 2344 | return {}; | 2373 | return {}; |
| 2345 | } | 2374 | } |
| 2346 | 2375 | ||
| 2347 | Expression MemoryBarrierGL(Operation) { | 2376 | Expression MemoryBarrierGroup(Operation) { |
| 2377 | code.AddLine("groupMemoryBarrier();"); | ||
| 2378 | return {}; | ||
| 2379 | } | ||
| 2380 | |||
| 2381 | Expression MemoryBarrierGlobal(Operation) { | ||
| 2348 | code.AddLine("memoryBarrier();"); | 2382 | code.AddLine("memoryBarrier();"); |
| 2349 | return {}; | 2383 | return {}; |
| 2350 | } | 2384 | } |
| @@ -2591,7 +2625,8 @@ private: | |||
| 2591 | &GLSLDecompiler::ShuffleIndexed, | 2625 | &GLSLDecompiler::ShuffleIndexed, |
| 2592 | 2626 | ||
| 2593 | &GLSLDecompiler::Barrier, | 2627 | &GLSLDecompiler::Barrier, |
| 2594 | &GLSLDecompiler::MemoryBarrierGL, | 2628 | &GLSLDecompiler::MemoryBarrierGroup, |
| 2629 | &GLSLDecompiler::MemoryBarrierGlobal, | ||
| 2595 | }; | 2630 | }; |
| 2596 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2631 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2597 | 2632 | ||
| @@ -2704,6 +2739,7 @@ private: | |||
| 2704 | const std::string_view identifier; | 2739 | const std::string_view identifier; |
| 2705 | const std::string_view suffix; | 2740 | const std::string_view suffix; |
| 2706 | const Header header; | 2741 | const Header header; |
| 2742 | const bool use_unified_uniforms; | ||
| 2707 | std::unordered_map<u8, VaryingTFB> transform_feedback; | 2743 | std::unordered_map<u8, VaryingTFB> transform_feedback; |
| 2708 | 2744 | ||
| 2709 | ShaderWriter code; | 2745 | ShaderWriter code; |
| @@ -2899,7 +2935,7 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2899 | 2935 | ||
| 2900 | } // Anonymous namespace | 2936 | } // Anonymous namespace |
| 2901 | 2937 | ||
| 2902 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { | 2938 | ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) { |
| 2903 | ShaderEntries entries; | 2939 | ShaderEntries entries; |
| 2904 | for (const auto& cbuf : ir.GetConstantBuffers()) { | 2940 | for (const auto& cbuf : ir.GetConstantBuffers()) { |
| 2905 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | 2941 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), |
| @@ -2920,6 +2956,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 2920 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | 2956 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; |
| 2921 | } | 2957 | } |
| 2922 | entries.shader_length = ir.GetLength(); | 2958 | entries.shader_length = ir.GetLength(); |
| 2959 | entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); | ||
| 2923 | return entries; | 2960 | return entries; |
| 2924 | } | 2961 | } |
| 2925 | 2962 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e8a178764..451c9689a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -53,11 +53,13 @@ struct ShaderEntries { | |||
| 53 | std::vector<GlobalMemoryEntry> global_memory_entries; | 53 | std::vector<GlobalMemoryEntry> global_memory_entries; |
| 54 | std::vector<SamplerEntry> samplers; | 54 | std::vector<SamplerEntry> samplers; |
| 55 | std::vector<ImageEntry> images; | 55 | std::vector<ImageEntry> images; |
| 56 | u32 clip_distances{}; | ||
| 57 | std::size_t shader_length{}; | 56 | std::size_t shader_length{}; |
| 57 | u32 clip_distances{}; | ||
| 58 | bool use_unified_uniforms{}; | ||
| 58 | }; | 59 | }; |
| 59 | 60 | ||
| 60 | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); | 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 62 | Tegra::Engines::ShaderType stage); | ||
| 61 | 63 | ||
| 62 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 64 | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 63 | const VideoCommon::Shader::Registry& registry, | 65 | const VideoCommon::Shader::Registry& registry, |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 4faa8b90c..57db5a08b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -404,8 +404,7 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr | |||
| 404 | 404 | ||
| 405 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, | 405 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, |
| 406 | bool is_proxy) | 406 | bool is_proxy) |
| 407 | : VideoCommon::ViewBase(params), surface{surface}, | 407 | : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format}, |
| 408 | format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format}, | ||
| 409 | target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { | 408 | target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { |
| 410 | if (!is_proxy) { | 409 | if (!is_proxy) { |
| 411 | main_view = CreateTextureView(); | 410 | main_view = CreateTextureView(); |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b489e6db..e7952924a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() { | |||
| 753 | bool RendererOpenGL::Init() { | 753 | bool RendererOpenGL::Init() { |
| 754 | if (GLAD_GL_KHR_debug) { | 754 | if (GLAD_GL_KHR_debug) { |
| 755 | glEnable(GL_DEBUG_OUTPUT); | 755 | glEnable(GL_DEBUG_OUTPUT); |
| 756 | if (Settings::values.renderer_debug) { | ||
| 757 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||
| 758 | } | ||
| 756 | glDebugMessageCallback(DebugHandler, nullptr); | 759 | glDebugMessageCallback(DebugHandler, nullptr); |
| 757 | } | 760 | } |
| 758 | 761 | ||
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 568744e3c..424278816 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { | |||
| 71 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); | 71 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); |
| 72 | 72 | ||
| 73 | u32 packed_front_face = PackFrontFace(regs.front_face); | 73 | u32 packed_front_face = PackFrontFace(regs.front_face); |
| 74 | if (regs.screen_y_control.triangle_rast_flip != 0 && | 74 | if (regs.screen_y_control.triangle_rast_flip != 0) { |
| 75 | regs.viewport_transform[0].scale_y > 0.0f) { | ||
| 76 | // Flip front face | 75 | // Flip front face |
| 77 | packed_front_face = 1 - packed_front_face; | 76 | packed_front_face = 1 - packed_front_face; |
| 78 | } | 77 | } |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 2871035f5..62e950d31 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -149,7 +149,7 @@ struct FormatTuple { | |||
| 149 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F | 149 | {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F |
| 150 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U | 150 | {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U |
| 151 | {VK_FORMAT_UNDEFINED}, // R16S | 151 | {VK_FORMAT_UNDEFINED}, // R16S |
| 152 | {VK_FORMAT_UNDEFINED}, // R16UI | 152 | {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI |
| 153 | {VK_FORMAT_UNDEFINED}, // R16I | 153 | {VK_FORMAT_UNDEFINED}, // R16I |
| 154 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 | 154 | {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 |
| 155 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F | 155 | {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 750e5a0ca..9fd8ac3f6 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType | |||
| 73 | 73 | ||
| 74 | std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | 74 | std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( |
| 75 | vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { | 75 | vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { |
| 76 | static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, | 76 | static constexpr std::array formats{ |
| 77 | VK_FORMAT_A8B8G8R8_UINT_PACK32, | 77 | VK_FORMAT_A8B8G8R8_UNORM_PACK32, |
| 78 | VK_FORMAT_A8B8G8R8_SNORM_PACK32, | 78 | VK_FORMAT_A8B8G8R8_UINT_PACK32, |
| 79 | VK_FORMAT_A8B8G8R8_SRGB_PACK32, | 79 | VK_FORMAT_A8B8G8R8_SNORM_PACK32, |
| 80 | VK_FORMAT_B5G6R5_UNORM_PACK16, | 80 | VK_FORMAT_A8B8G8R8_SRGB_PACK32, |
| 81 | VK_FORMAT_A2B10G10R10_UNORM_PACK32, | 81 | VK_FORMAT_B5G6R5_UNORM_PACK16, |
| 82 | VK_FORMAT_A1R5G5B5_UNORM_PACK16, | 82 | VK_FORMAT_A2B10G10R10_UNORM_PACK32, |
| 83 | VK_FORMAT_R32G32B32A32_SFLOAT, | 83 | VK_FORMAT_A1R5G5B5_UNORM_PACK16, |
| 84 | VK_FORMAT_R32G32B32A32_UINT, | 84 | VK_FORMAT_R32G32B32A32_SFLOAT, |
| 85 | VK_FORMAT_R32G32_SFLOAT, | 85 | VK_FORMAT_R32G32B32A32_UINT, |
| 86 | VK_FORMAT_R32G32_UINT, | 86 | VK_FORMAT_R32G32_SFLOAT, |
| 87 | VK_FORMAT_R16G16B16A16_UINT, | 87 | VK_FORMAT_R32G32_UINT, |
| 88 | VK_FORMAT_R16G16B16A16_SNORM, | 88 | VK_FORMAT_R16G16B16A16_UINT, |
| 89 | VK_FORMAT_R16G16B16A16_UNORM, | 89 | VK_FORMAT_R16G16B16A16_SNORM, |
| 90 | VK_FORMAT_R16G16_UNORM, | 90 | VK_FORMAT_R16G16B16A16_UNORM, |
| 91 | VK_FORMAT_R16G16_SNORM, | 91 | VK_FORMAT_R16G16_UNORM, |
| 92 | VK_FORMAT_R16G16_SFLOAT, | 92 | VK_FORMAT_R16G16_SNORM, |
| 93 | VK_FORMAT_R16_UNORM, | 93 | VK_FORMAT_R16G16_SFLOAT, |
| 94 | VK_FORMAT_R8G8B8A8_SRGB, | 94 | VK_FORMAT_R16_UNORM, |
| 95 | VK_FORMAT_R8G8_UNORM, | 95 | VK_FORMAT_R16_UINT, |
| 96 | VK_FORMAT_R8G8_SNORM, | 96 | VK_FORMAT_R8G8B8A8_SRGB, |
| 97 | VK_FORMAT_R8G8_UINT, | 97 | VK_FORMAT_R8G8_UNORM, |
| 98 | VK_FORMAT_R8_UNORM, | 98 | VK_FORMAT_R8G8_SNORM, |
| 99 | VK_FORMAT_R8_UINT, | 99 | VK_FORMAT_R8G8_UINT, |
| 100 | VK_FORMAT_B10G11R11_UFLOAT_PACK32, | 100 | VK_FORMAT_R8_UNORM, |
| 101 | VK_FORMAT_R32_SFLOAT, | 101 | VK_FORMAT_R8_UINT, |
| 102 | VK_FORMAT_R32_UINT, | 102 | VK_FORMAT_B10G11R11_UFLOAT_PACK32, |
| 103 | VK_FORMAT_R32_SINT, | 103 | VK_FORMAT_R32_SFLOAT, |
| 104 | VK_FORMAT_R16_SFLOAT, | 104 | VK_FORMAT_R32_UINT, |
| 105 | VK_FORMAT_R16G16B16A16_SFLOAT, | 105 | VK_FORMAT_R32_SINT, |
| 106 | VK_FORMAT_B8G8R8A8_UNORM, | 106 | VK_FORMAT_R16_SFLOAT, |
| 107 | VK_FORMAT_B8G8R8A8_SRGB, | 107 | VK_FORMAT_R16G16B16A16_SFLOAT, |
| 108 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, | 108 | VK_FORMAT_B8G8R8A8_UNORM, |
| 109 | VK_FORMAT_D32_SFLOAT, | 109 | VK_FORMAT_B8G8R8A8_SRGB, |
| 110 | VK_FORMAT_D16_UNORM, | 110 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, |
| 111 | VK_FORMAT_D16_UNORM_S8_UINT, | 111 | VK_FORMAT_D32_SFLOAT, |
| 112 | VK_FORMAT_D24_UNORM_S8_UINT, | 112 | VK_FORMAT_D16_UNORM, |
| 113 | VK_FORMAT_D32_SFLOAT_S8_UINT, | 113 | VK_FORMAT_D16_UNORM_S8_UINT, |
| 114 | VK_FORMAT_BC1_RGBA_UNORM_BLOCK, | 114 | VK_FORMAT_D24_UNORM_S8_UINT, |
| 115 | VK_FORMAT_BC2_UNORM_BLOCK, | 115 | VK_FORMAT_D32_SFLOAT_S8_UINT, |
| 116 | VK_FORMAT_BC3_UNORM_BLOCK, | 116 | VK_FORMAT_BC1_RGBA_UNORM_BLOCK, |
| 117 | VK_FORMAT_BC4_UNORM_BLOCK, | 117 | VK_FORMAT_BC2_UNORM_BLOCK, |
| 118 | VK_FORMAT_BC5_UNORM_BLOCK, | 118 | VK_FORMAT_BC3_UNORM_BLOCK, |
| 119 | VK_FORMAT_BC5_SNORM_BLOCK, | 119 | VK_FORMAT_BC4_UNORM_BLOCK, |
| 120 | VK_FORMAT_BC7_UNORM_BLOCK, | 120 | VK_FORMAT_BC5_UNORM_BLOCK, |
| 121 | VK_FORMAT_BC6H_UFLOAT_BLOCK, | 121 | VK_FORMAT_BC5_SNORM_BLOCK, |
| 122 | VK_FORMAT_BC6H_SFLOAT_BLOCK, | 122 | VK_FORMAT_BC7_UNORM_BLOCK, |
| 123 | VK_FORMAT_BC1_RGBA_SRGB_BLOCK, | 123 | VK_FORMAT_BC6H_UFLOAT_BLOCK, |
| 124 | VK_FORMAT_BC2_SRGB_BLOCK, | 124 | VK_FORMAT_BC6H_SFLOAT_BLOCK, |
| 125 | VK_FORMAT_BC3_SRGB_BLOCK, | 125 | VK_FORMAT_BC1_RGBA_SRGB_BLOCK, |
| 126 | VK_FORMAT_BC7_SRGB_BLOCK, | 126 | VK_FORMAT_BC2_SRGB_BLOCK, |
| 127 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, | 127 | VK_FORMAT_BC3_SRGB_BLOCK, |
| 128 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, | 128 | VK_FORMAT_BC7_SRGB_BLOCK, |
| 129 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, | 129 | VK_FORMAT_ASTC_4x4_SRGB_BLOCK, |
| 130 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, | 130 | VK_FORMAT_ASTC_8x8_SRGB_BLOCK, |
| 131 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, | 131 | VK_FORMAT_ASTC_8x5_SRGB_BLOCK, |
| 132 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, | 132 | VK_FORMAT_ASTC_5x4_SRGB_BLOCK, |
| 133 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, | 133 | VK_FORMAT_ASTC_5x5_UNORM_BLOCK, |
| 134 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, | 134 | VK_FORMAT_ASTC_5x5_SRGB_BLOCK, |
| 135 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, | 135 | VK_FORMAT_ASTC_10x8_UNORM_BLOCK, |
| 136 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, | 136 | VK_FORMAT_ASTC_10x8_SRGB_BLOCK, |
| 137 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, | 137 | VK_FORMAT_ASTC_6x6_UNORM_BLOCK, |
| 138 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, | 138 | VK_FORMAT_ASTC_6x6_SRGB_BLOCK, |
| 139 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, | 139 | VK_FORMAT_ASTC_10x10_UNORM_BLOCK, |
| 140 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, | 140 | VK_FORMAT_ASTC_10x10_SRGB_BLOCK, |
| 141 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, | 141 | VK_FORMAT_ASTC_12x12_UNORM_BLOCK, |
| 142 | VK_FORMAT_ASTC_8x6_SRGB_BLOCK, | 142 | VK_FORMAT_ASTC_12x12_SRGB_BLOCK, |
| 143 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, | 143 | VK_FORMAT_ASTC_8x6_UNORM_BLOCK, |
| 144 | VK_FORMAT_ASTC_6x5_SRGB_BLOCK, | 144 | VK_FORMAT_ASTC_8x6_SRGB_BLOCK, |
| 145 | VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; | 145 | VK_FORMAT_ASTC_6x5_UNORM_BLOCK, |
| 146 | VK_FORMAT_ASTC_6x5_SRGB_BLOCK, | ||
| 147 | VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, | ||
| 148 | }; | ||
| 146 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; | 149 | std::unordered_map<VkFormat, VkFormatProperties> format_properties; |
| 147 | for (const auto format : formats) { | 150 | for (const auto format : formats) { |
| 148 | format_properties.emplace(format, physical.GetFormatProperties(format)); | 151 | format_properties.emplace(format, physical.GetFormatProperties(format)); |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5c7b7945..65a1c6245 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 312 | ASSERT(point_size != 0.0f); | 312 | ASSERT(point_size != 0.0f); |
| 313 | } | 313 | } |
| 314 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | 314 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { |
| 315 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); | 315 | const auto& attribute = fixed_state.vertex_input.attributes[i]; |
| 316 | specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; | ||
| 317 | specialization.attribute_types[i] = attribute.Type(); | ||
| 316 | } | 318 | } |
| 317 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 319 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 318 | 320 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index be5b77fae..a3d992ed3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex | |||
| 877 | 877 | ||
| 878 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | 878 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { |
| 879 | const auto& attrib = regs.vertex_attrib_format[index]; | 879 | const auto& attrib = regs.vertex_attrib_format[index]; |
| 880 | if (!attrib.IsValid()) { | 880 | if (attrib.IsConstant()) { |
| 881 | vertex_input.SetAttribute(index, false, 0, 0, {}, {}); | 881 | vertex_input.SetAttribute(index, false, 0, 0, {}, {}); |
| 882 | continue; | 882 | continue; |
| 883 | } | 883 | } |
| 884 | |||
| 885 | [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; | ||
| 886 | ASSERT(buffer.IsEnabled()); | ||
| 887 | |||
| 888 | vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), | 884 | vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), |
| 889 | attrib.size.Value()); | 885 | attrib.size.Value()); |
| 890 | } | 886 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 890f34a2c..a13e8baa7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -741,8 +741,10 @@ private: | |||
| 741 | if (!IsGenericAttribute(index)) { | 741 | if (!IsGenericAttribute(index)) { |
| 742 | continue; | 742 | continue; |
| 743 | } | 743 | } |
| 744 | |||
| 745 | const u32 location = GetGenericAttributeLocation(index); | 744 | const u32 location = GetGenericAttributeLocation(index); |
| 745 | if (!IsAttributeEnabled(location)) { | ||
| 746 | continue; | ||
| 747 | } | ||
| 746 | const auto type_descriptor = GetAttributeType(location); | 748 | const auto type_descriptor = GetAttributeType(location); |
| 747 | Id type; | 749 | Id type; |
| 748 | if (IsInputAttributeArray()) { | 750 | if (IsInputAttributeArray()) { |
| @@ -986,6 +988,10 @@ private: | |||
| 986 | return stage == ShaderType::TesselationControl; | 988 | return stage == ShaderType::TesselationControl; |
| 987 | } | 989 | } |
| 988 | 990 | ||
| 991 | bool IsAttributeEnabled(u32 location) const { | ||
| 992 | return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; | ||
| 993 | } | ||
| 994 | |||
| 989 | u32 GetNumInputVertices() const { | 995 | u32 GetNumInputVertices() const { |
| 990 | switch (stage) { | 996 | switch (stage) { |
| 991 | case ShaderType::Geometry: | 997 | case ShaderType::Geometry: |
| @@ -1201,16 +1207,20 @@ private: | |||
| 1201 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | 1207 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); |
| 1202 | return {v_float_zero, Type::Float}; | 1208 | return {v_float_zero, Type::Float}; |
| 1203 | default: | 1209 | default: |
| 1204 | if (IsGenericAttribute(attribute)) { | 1210 | if (!IsGenericAttribute(attribute)) { |
| 1205 | const u32 location = GetGenericAttributeLocation(attribute); | 1211 | break; |
| 1206 | const auto type_descriptor = GetAttributeType(location); | ||
| 1207 | const Type type = type_descriptor.type; | ||
| 1208 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1209 | const std::vector elements = {element}; | ||
| 1210 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1211 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1212 | } | 1212 | } |
| 1213 | break; | 1213 | const u32 location = GetGenericAttributeLocation(attribute); |
| 1214 | if (!IsAttributeEnabled(location)) { | ||
| 1215 | // Disabled attributes (also known as constant attributes) always return zero. | ||
| 1216 | return {v_float_zero, Type::Float}; | ||
| 1217 | } | ||
| 1218 | const auto type_descriptor = GetAttributeType(location); | ||
| 1219 | const Type type = type_descriptor.type; | ||
| 1220 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1221 | const std::vector elements = {element}; | ||
| 1222 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1223 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1214 | } | 1224 | } |
| 1215 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | 1225 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); |
| 1216 | return {v_float_zero, Type::Float}; | 1226 | return {v_float_zero, Type::Float}; |
| @@ -2215,8 +2225,8 @@ private: | |||
| 2215 | return {}; | 2225 | return {}; |
| 2216 | } | 2226 | } |
| 2217 | 2227 | ||
| 2218 | Expression MemoryBarrierGL(Operation) { | 2228 | template <spv::Scope scope> |
| 2219 | const auto scope = spv::Scope::Device; | 2229 | Expression MemoryBarrier(Operation) { |
| 2220 | const auto semantics = | 2230 | const auto semantics = |
| 2221 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | | 2231 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | |
| 2222 | spv::MemorySemanticsMask::WorkgroupMemory | | 2232 | spv::MemorySemanticsMask::WorkgroupMemory | |
| @@ -2681,7 +2691,8 @@ private: | |||
| 2681 | &SPIRVDecompiler::ShuffleIndexed, | 2691 | &SPIRVDecompiler::ShuffleIndexed, |
| 2682 | 2692 | ||
| 2683 | &SPIRVDecompiler::Barrier, | 2693 | &SPIRVDecompiler::Barrier, |
| 2684 | &SPIRVDecompiler::MemoryBarrierGL, | 2694 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, |
| 2695 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, | ||
| 2685 | }; | 2696 | }; |
| 2686 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2697 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2687 | 2698 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f4c05ac3c..b7af26388 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -88,7 +88,8 @@ struct Specialization final { | |||
| 88 | u32 shared_memory_size{}; | 88 | u32 shared_memory_size{}; |
| 89 | 89 | ||
| 90 | // Graphics specific | 90 | // Graphics specific |
| 91 | std::optional<float> point_size{}; | 91 | std::optional<float> point_size; |
| 92 | std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; | ||
| 92 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 93 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 93 | bool ndc_minus_one_to_one{}; | 94 | bool ndc_minus_one_to_one{}; |
| 94 | }; | 95 | }; |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 694b325e1..c0a8f233f 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 83 | return Operation(OperationCode::YNegate); | 83 | return Operation(OperationCode::YNegate); |
| 84 | case SystemVariable::InvocationInfo: | 84 | case SystemVariable::InvocationInfo: |
| 85 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | 85 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); |
| 86 | return Immediate(0U); | 86 | return Immediate(0x00ff'0000U); |
| 87 | case SystemVariable::WscaleFactorXY: | 87 | case SystemVariable::WscaleFactorXY: |
| 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); | 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); |
| 89 | return Immediate(0U); | 89 | return Immediate(0U); |
| @@ -299,9 +299,19 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 299 | break; | 299 | break; |
| 300 | } | 300 | } |
| 301 | case OpCode::Id::MEMBAR: { | 301 | case OpCode::Id::MEMBAR: { |
| 302 | UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); | ||
| 303 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | 302 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); |
| 304 | bb.push_back(Operation(OperationCode::MemoryBarrierGL)); | 303 | const OperationCode type = [instr] { |
| 304 | switch (instr.membar.type) { | ||
| 305 | case Tegra::Shader::MembarType::CTA: | ||
| 306 | return OperationCode::MemoryBarrierGroup; | ||
| 307 | case Tegra::Shader::MembarType::GL: | ||
| 308 | return OperationCode::MemoryBarrierGlobal; | ||
| 309 | default: | ||
| 310 | UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); | ||
| 311 | return OperationCode::MemoryBarrierGlobal; | ||
| 312 | } | ||
| 313 | }(); | ||
| 314 | bb.push_back(Operation(type)); | ||
| 305 | break; | 315 | break; |
| 306 | } | 316 | } |
| 307 | case OpCode::Id::DEPBAR: { | 317 | case OpCode::Id::DEPBAR: { |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c06512413..c5e5165ff 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -233,8 +233,9 @@ enum class OperationCode { | |||
| 233 | ThreadLtMask, /// () -> uint | 233 | ThreadLtMask, /// () -> uint |
| 234 | ShuffleIndexed, /// (uint value, uint index) -> uint | 234 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 235 | 235 | ||
| 236 | Barrier, /// () -> void | 236 | Barrier, /// () -> void |
| 237 | MemoryBarrierGL, /// () -> void | 237 | MemoryBarrierGroup, /// () -> void |
| 238 | MemoryBarrierGlobal, /// () -> void | ||
| 238 | 239 | ||
| 239 | Amount, | 240 | Amount, |
| 240 | }; | 241 | }; |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -41,7 +41,7 @@ struct Table { | |||
| 41 | ComponentType alpha_component; | 41 | ComponentType alpha_component; |
| 42 | bool is_srgb; | 42 | bool is_srgb; |
| 43 | }; | 43 | }; |
| 44 | constexpr std::array<Table, 77> DefinitionTable = {{ | 44 | constexpr std::array<Table, 78> DefinitionTable = {{ |
| 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, | 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, |
| 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, | 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, |
| 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, | 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, |
| @@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{ | |||
| 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, | 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, |
| 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, | 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, |
| 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, |
| 101 | {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | ||
| 101 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, | 102 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, |
| 102 | 103 | ||
| 103 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, | 104 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4ba0d2c3a..6f63217a2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <vector> | 15 | #include <vector> |
| 16 | 16 | ||
| 17 | #include <boost/container/small_vector.hpp> | ||
| 17 | #include <boost/icl/interval_map.hpp> | 18 | #include <boost/icl/interval_map.hpp> |
| 18 | #include <boost/range/iterator_range.hpp> | 19 | #include <boost/range/iterator_range.hpp> |
| 19 | 20 | ||
| @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 53 | 54 | ||
| 54 | template <typename TSurface, typename TView> | 55 | template <typename TSurface, typename TView> |
| 55 | class TextureCache { | 56 | class TextureCache { |
| 57 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | ||
| 56 | 58 | ||
| 57 | public: | 59 | public: |
| 58 | void InvalidateRegion(VAddr addr, std::size_t size) { | 60 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| @@ -308,18 +310,20 @@ public: | |||
| 308 | dst_surface.first->MarkAsModified(true, Tick()); | 310 | dst_surface.first->MarkAsModified(true, Tick()); |
| 309 | } | 311 | } |
| 310 | 312 | ||
| 311 | TSurface TryFindFramebufferSurface(VAddr addr) { | 313 | TSurface TryFindFramebufferSurface(VAddr addr) const { |
| 312 | if (!addr) { | 314 | if (!addr) { |
| 313 | return nullptr; | 315 | return nullptr; |
| 314 | } | 316 | } |
| 315 | const VAddr page = addr >> registry_page_bits; | 317 | const VAddr page = addr >> registry_page_bits; |
| 316 | std::vector<TSurface>& list = registry[page]; | 318 | const auto it = registry.find(page); |
| 317 | for (auto& surface : list) { | 319 | if (it == registry.end()) { |
| 318 | if (surface->GetCpuAddr() == addr) { | 320 | return nullptr; |
| 319 | return surface; | ||
| 320 | } | ||
| 321 | } | 321 | } |
| 322 | return nullptr; | 322 | const auto& list = it->second; |
| 323 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 324 | return surface->GetCpuAddr() == addr; | ||
| 325 | }); | ||
| 326 | return found != list.end() ? *found : nullptr; | ||
| 323 | } | 327 | } |
| 324 | 328 | ||
| 325 | u64 Tick() { | 329 | u64 Tick() { |
| @@ -498,7 +502,7 @@ private: | |||
| 498 | * @param untopological Indicates to the recycler that the texture has no way | 502 | * @param untopological Indicates to the recycler that the texture has no way |
| 499 | * to match the overlaps due to topological reasons. | 503 | * to match the overlaps due to topological reasons. |
| 500 | **/ | 504 | **/ |
| 501 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 505 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, |
| 502 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 506 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| 503 | if (Settings::IsGPULevelExtreme()) { | 507 | if (Settings::IsGPULevelExtreme()) { |
| 504 | return RecycleStrategy::Flush; | 508 | return RecycleStrategy::Flush; |
| @@ -538,9 +542,8 @@ private: | |||
| 538 | * @param untopological Indicates to the recycler that the texture has no way to match the | 542 | * @param untopological Indicates to the recycler that the texture has no way to match the |
| 539 | * overlaps due to topological reasons. | 543 | * overlaps due to topological reasons. |
| 540 | **/ | 544 | **/ |
| 541 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | 545 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, |
| 542 | const SurfaceParams& params, const GPUVAddr gpu_addr, | 546 | const GPUVAddr gpu_addr, const bool preserve_contents, |
| 543 | const bool preserve_contents, | ||
| 544 | const MatchTopologyResult untopological) { | 547 | const MatchTopologyResult untopological) { |
| 545 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | 548 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); |
| 546 | for (auto& surface : overlaps) { | 549 | for (auto& surface : overlaps) { |
| @@ -650,7 +653,7 @@ private: | |||
| 650 | * @param params The parameters on the new surface. | 653 | * @param params The parameters on the new surface. |
| 651 | * @param gpu_addr The starting address of the new surface. | 654 | * @param gpu_addr The starting address of the new surface. |
| 652 | **/ | 655 | **/ |
| 653 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | 656 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, |
| 654 | const SurfaceParams& params, | 657 | const SurfaceParams& params, |
| 655 | GPUVAddr gpu_addr) { | 658 | GPUVAddr gpu_addr) { |
| 656 | if (params.target == SurfaceTarget::Texture3D) { | 659 | if (params.target == SurfaceTarget::Texture3D) { |
| @@ -726,7 +729,7 @@ private: | |||
| 726 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | 729 | * @param preserve_contents Indicates that the new surface should be loaded from memory or |
| 727 | * left blank. | 730 | * left blank. |
| 728 | */ | 731 | */ |
| 729 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | 732 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, |
| 730 | const SurfaceParams& params, | 733 | const SurfaceParams& params, |
| 731 | const GPUVAddr gpu_addr, | 734 | const GPUVAddr gpu_addr, |
| 732 | const VAddr cpu_addr, | 735 | const VAddr cpu_addr, |
| @@ -828,7 +831,7 @@ private: | |||
| 828 | TSurface& current_surface = iter->second; | 831 | TSurface& current_surface = iter->second; |
| 829 | const auto topological_result = current_surface->MatchesTopology(params); | 832 | const auto topological_result = current_surface->MatchesTopology(params); |
| 830 | if (topological_result != MatchTopologyResult::FullMatch) { | 833 | if (topological_result != MatchTopologyResult::FullMatch) { |
| 831 | std::vector<TSurface> overlaps{current_surface}; | 834 | VectorSurface overlaps{current_surface}; |
| 832 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 835 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
| 833 | topological_result); | 836 | topological_result); |
| 834 | } | 837 | } |
| @@ -1141,23 +1144,25 @@ private: | |||
| 1141 | } | 1144 | } |
| 1142 | } | 1145 | } |
| 1143 | 1146 | ||
| 1144 | std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1147 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { |
| 1145 | if (size == 0) { | 1148 | if (size == 0) { |
| 1146 | return {}; | 1149 | return {}; |
| 1147 | } | 1150 | } |
| 1148 | const VAddr cpu_addr_end = cpu_addr + size; | 1151 | const VAddr cpu_addr_end = cpu_addr + size; |
| 1149 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1150 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1152 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; |
| 1151 | std::vector<TSurface> surfaces; | 1153 | VectorSurface surfaces; |
| 1152 | while (start <= end) { | 1154 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { |
| 1153 | std::vector<TSurface>& list = registry[start]; | 1155 | const auto it = registry.find(start); |
| 1154 | for (auto& surface : list) { | 1156 | if (it == registry.end()) { |
| 1155 | if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1157 | continue; |
| 1156 | surface->MarkAsPicked(true); | 1158 | } |
| 1157 | surfaces.push_back(surface); | 1159 | for (auto& surface : it->second) { |
| 1160 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | ||
| 1161 | continue; | ||
| 1158 | } | 1162 | } |
| 1163 | surface->MarkAsPicked(true); | ||
| 1164 | surfaces.push_back(surface); | ||
| 1159 | } | 1165 | } |
| 1160 | start++; | ||
| 1161 | } | 1166 | } |
| 1162 | for (auto& surface : surfaces) { | 1167 | for (auto& surface : surfaces) { |
| 1163 | surface->MarkAsPicked(false); | 1168 | surface->MarkAsPicked(false); |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 1adf8932b..1f5e43043 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -106,6 +106,9 @@ public: | |||
| 106 | format.setVersion(4, 3); | 106 | format.setVersion(4, 3); |
| 107 | format.setProfile(QSurfaceFormat::CompatibilityProfile); | 107 | format.setProfile(QSurfaceFormat::CompatibilityProfile); |
| 108 | format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); | 108 | format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); |
| 109 | if (Settings::values.renderer_debug) { | ||
| 110 | format.setOption(QSurfaceFormat::FormatOption::DebugContext); | ||
| 111 | } | ||
| 109 | // TODO: expose a setting for buffer value (ie default/single/double/triple) | 112 | // TODO: expose a setting for buffer value (ie default/single/double/triple) |
| 110 | format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); | 113 | format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); |
| 111 | format.setSwapInterval(0); | 114 | format.setSwapInterval(0); |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 411e7e647..09cc0a3b5 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | |||
| @@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen) | |||
| 98 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); | 98 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); |
| 99 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); | 99 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); |
| 100 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); | 100 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); |
| 101 | if (Settings::values.renderer_debug) { | ||
| 102 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); | ||
| 103 | } | ||
| 101 | SDL_GL_SetSwapInterval(0); | 104 | SDL_GL_SetSwapInterval(0); |
| 102 | 105 | ||
| 103 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, | 106 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, |