diff options
Diffstat (limited to 'src')
32 files changed, 648 insertions, 157 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index e6769a5f3..24b7a083c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -123,6 +123,8 @@ add_library(common STATIC | |||
| 123 | lz4_compression.cpp | 123 | lz4_compression.cpp |
| 124 | lz4_compression.h | 124 | lz4_compression.h |
| 125 | math_util.h | 125 | math_util.h |
| 126 | memory_detect.cpp | ||
| 127 | memory_detect.h | ||
| 126 | memory_hook.cpp | 128 | memory_hook.cpp |
| 127 | memory_hook.h | 129 | memory_hook.h |
| 128 | microprofile.cpp | 130 | microprofile.cpp |
| @@ -169,10 +171,12 @@ if(ARCHITECTURE_x86_64) | |||
| 169 | PRIVATE | 171 | PRIVATE |
| 170 | x64/cpu_detect.cpp | 172 | x64/cpu_detect.cpp |
| 171 | x64/cpu_detect.h | 173 | x64/cpu_detect.h |
| 174 | x64/xbyak_abi.h | ||
| 175 | x64/xbyak_util.h | ||
| 172 | ) | 176 | ) |
| 173 | endif() | 177 | endif() |
| 174 | 178 | ||
| 175 | create_target_directory_groups(common) | 179 | create_target_directory_groups(common) |
| 176 | 180 | ||
| 177 | target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) | 181 | target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) |
| 178 | target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) | 182 | target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak) |
diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp new file mode 100644 index 000000000..3fdc309a2 --- /dev/null +++ b/src/common/memory_detect.cpp | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #ifdef _WIN32 | ||
| 6 | // clang-format off | ||
| 7 | #include <windows.h> | ||
| 8 | #include <sysinfoapi.h> | ||
| 9 | // clang-format on | ||
| 10 | #else | ||
| 11 | #include <sys/types.h> | ||
| 12 | #ifdef __APPLE__ | ||
| 13 | #include <sys/sysctl.h> | ||
| 14 | #else | ||
| 15 | #include <sys/sysinfo.h> | ||
| 16 | #endif | ||
| 17 | #endif | ||
| 18 | |||
| 19 | #include "common/memory_detect.h" | ||
| 20 | |||
| 21 | namespace Common { | ||
| 22 | |||
| 23 | // Detects the RAM and Swapfile sizes | ||
| 24 | static MemoryInfo Detect() { | ||
| 25 | MemoryInfo mem_info{}; | ||
| 26 | |||
| 27 | #ifdef _WIN32 | ||
| 28 | MEMORYSTATUSEX memorystatus; | ||
| 29 | memorystatus.dwLength = sizeof(memorystatus); | ||
| 30 | GlobalMemoryStatusEx(&memorystatus); | ||
| 31 | mem_info.TotalPhysicalMemory = memorystatus.ullTotalPhys; | ||
| 32 | mem_info.TotalSwapMemory = memorystatus.ullTotalPageFile - mem_info.TotalPhysicalMemory; | ||
| 33 | #elif defined(__APPLE__) | ||
| 34 | u64 ramsize; | ||
| 35 | struct xsw_usage vmusage; | ||
| 36 | std::size_t sizeof_ramsize = sizeof(ramsize); | ||
| 37 | std::size_t sizeof_vmusage = sizeof(vmusage); | ||
| 38 | // hw and vm are defined in sysctl.h | ||
| 39 | // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471 | ||
| 40 | // sysctlbyname(const char *, void *, size_t *, void *, size_t); | ||
| 41 | sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0); | ||
| 42 | sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0); | ||
| 43 | mem_info.TotalPhysicalMemory = ramsize; | ||
| 44 | mem_info.TotalSwapMemory = vmusage.xsu_total; | ||
| 45 | #else | ||
| 46 | struct sysinfo meminfo; | ||
| 47 | sysinfo(&meminfo); | ||
| 48 | mem_info.TotalPhysicalMemory = meminfo.totalram; | ||
| 49 | mem_info.TotalSwapMemory = meminfo.totalswap; | ||
| 50 | #endif | ||
| 51 | |||
| 52 | return mem_info; | ||
| 53 | } | ||
| 54 | |||
| 55 | const MemoryInfo& GetMemInfo() { | ||
| 56 | static MemoryInfo mem_info = Detect(); | ||
| 57 | return mem_info; | ||
| 58 | } | ||
| 59 | |||
| 60 | } // namespace Common \ No newline at end of file | ||
diff --git a/src/common/memory_detect.h b/src/common/memory_detect.h new file mode 100644 index 000000000..a73c0f3f4 --- /dev/null +++ b/src/common/memory_detect.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | struct MemoryInfo { | ||
| 12 | u64 TotalPhysicalMemory{}; | ||
| 13 | u64 TotalSwapMemory{}; | ||
| 14 | }; | ||
| 15 | |||
| 16 | /** | ||
| 17 | * Gets the memory info of the host system | ||
| 18 | * @return Reference to a MemoryInfo struct with the physical and swap memory sizes in bytes | ||
| 19 | */ | ||
| 20 | const MemoryInfo& GetMemInfo(); | ||
| 21 | |||
| 22 | } // namespace Common \ No newline at end of file | ||
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h new file mode 100644 index 000000000..794da8a52 --- /dev/null +++ b/src/common/x64/xbyak_abi.h | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <bitset> | ||
| 8 | #include <initializer_list> | ||
| 9 | #include <xbyak.h> | ||
| 10 | #include "common/assert.h" | ||
| 11 | |||
| 12 | namespace Common::X64 { | ||
| 13 | |||
| 14 | inline int RegToIndex(const Xbyak::Reg& reg) { | ||
| 15 | using Kind = Xbyak::Reg::Kind; | ||
| 16 | ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, | ||
| 17 | "RegSet only support GPRs and XMM registers."); | ||
| 18 | ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15."); | ||
| 19 | return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); | ||
| 20 | } | ||
| 21 | |||
| 22 | inline Xbyak::Reg64 IndexToReg64(int reg_index) { | ||
| 23 | ASSERT(reg_index < 16); | ||
| 24 | return Xbyak::Reg64(reg_index); | ||
| 25 | } | ||
| 26 | |||
| 27 | inline Xbyak::Xmm IndexToXmm(int reg_index) { | ||
| 28 | ASSERT(reg_index >= 16 && reg_index < 32); | ||
| 29 | return Xbyak::Xmm(reg_index - 16); | ||
| 30 | } | ||
| 31 | |||
| 32 | inline Xbyak::Reg IndexToReg(int reg_index) { | ||
| 33 | if (reg_index < 16) { | ||
| 34 | return IndexToReg64(reg_index); | ||
| 35 | } else { | ||
| 36 | return IndexToXmm(reg_index); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | |||
| 40 | inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { | ||
| 41 | std::bitset<32> bits; | ||
| 42 | for (const Xbyak::Reg& reg : regs) { | ||
| 43 | bits[RegToIndex(reg)] = true; | ||
| 44 | } | ||
| 45 | return bits; | ||
| 46 | } | ||
| 47 | |||
| 48 | const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF); | ||
| 49 | const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000); | ||
| 50 | |||
| 51 | #ifdef _WIN32 | ||
| 52 | |||
| 53 | // Microsoft x64 ABI | ||
| 54 | const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||
| 55 | const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; | ||
| 56 | const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; | ||
| 57 | const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; | ||
| 58 | const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; | ||
| 59 | |||
| 60 | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||
| 61 | // GPRs | ||
| 62 | Xbyak::util::rcx, | ||
| 63 | Xbyak::util::rdx, | ||
| 64 | Xbyak::util::r8, | ||
| 65 | Xbyak::util::r9, | ||
| 66 | Xbyak::util::r10, | ||
| 67 | Xbyak::util::r11, | ||
| 68 | // XMMs | ||
| 69 | Xbyak::util::xmm0, | ||
| 70 | Xbyak::util::xmm1, | ||
| 71 | Xbyak::util::xmm2, | ||
| 72 | Xbyak::util::xmm3, | ||
| 73 | Xbyak::util::xmm4, | ||
| 74 | Xbyak::util::xmm5, | ||
| 75 | }); | ||
| 76 | |||
| 77 | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||
| 78 | // GPRs | ||
| 79 | Xbyak::util::rbx, | ||
| 80 | Xbyak::util::rsi, | ||
| 81 | Xbyak::util::rdi, | ||
| 82 | Xbyak::util::rbp, | ||
| 83 | Xbyak::util::r12, | ||
| 84 | Xbyak::util::r13, | ||
| 85 | Xbyak::util::r14, | ||
| 86 | Xbyak::util::r15, | ||
| 87 | // XMMs | ||
| 88 | Xbyak::util::xmm6, | ||
| 89 | Xbyak::util::xmm7, | ||
| 90 | Xbyak::util::xmm8, | ||
| 91 | Xbyak::util::xmm9, | ||
| 92 | Xbyak::util::xmm10, | ||
| 93 | Xbyak::util::xmm11, | ||
| 94 | Xbyak::util::xmm12, | ||
| 95 | Xbyak::util::xmm13, | ||
| 96 | Xbyak::util::xmm14, | ||
| 97 | Xbyak::util::xmm15, | ||
| 98 | }); | ||
| 99 | |||
| 100 | constexpr size_t ABI_SHADOW_SPACE = 0x20; | ||
| 101 | |||
| 102 | #else | ||
| 103 | |||
| 104 | // System V x86-64 ABI | ||
| 105 | const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; | ||
| 106 | const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; | ||
| 107 | const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; | ||
| 108 | const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; | ||
| 109 | const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; | ||
| 110 | |||
| 111 | const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({ | ||
| 112 | // GPRs | ||
| 113 | Xbyak::util::rcx, | ||
| 114 | Xbyak::util::rdx, | ||
| 115 | Xbyak::util::rdi, | ||
| 116 | Xbyak::util::rsi, | ||
| 117 | Xbyak::util::r8, | ||
| 118 | Xbyak::util::r9, | ||
| 119 | Xbyak::util::r10, | ||
| 120 | Xbyak::util::r11, | ||
| 121 | // XMMs | ||
| 122 | Xbyak::util::xmm0, | ||
| 123 | Xbyak::util::xmm1, | ||
| 124 | Xbyak::util::xmm2, | ||
| 125 | Xbyak::util::xmm3, | ||
| 126 | Xbyak::util::xmm4, | ||
| 127 | Xbyak::util::xmm5, | ||
| 128 | Xbyak::util::xmm6, | ||
| 129 | Xbyak::util::xmm7, | ||
| 130 | Xbyak::util::xmm8, | ||
| 131 | Xbyak::util::xmm9, | ||
| 132 | Xbyak::util::xmm10, | ||
| 133 | Xbyak::util::xmm11, | ||
| 134 | Xbyak::util::xmm12, | ||
| 135 | Xbyak::util::xmm13, | ||
| 136 | Xbyak::util::xmm14, | ||
| 137 | Xbyak::util::xmm15, | ||
| 138 | }); | ||
| 139 | |||
| 140 | const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({ | ||
| 141 | // GPRs | ||
| 142 | Xbyak::util::rbx, | ||
| 143 | Xbyak::util::rbp, | ||
| 144 | Xbyak::util::r12, | ||
| 145 | Xbyak::util::r13, | ||
| 146 | Xbyak::util::r14, | ||
| 147 | Xbyak::util::r15, | ||
| 148 | }); | ||
| 149 | |||
| 150 | constexpr size_t ABI_SHADOW_SPACE = 0; | ||
| 151 | |||
| 152 | #endif | ||
| 153 | |||
| 154 | inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment, | ||
| 155 | size_t needed_frame_size, s32* out_subtraction, | ||
| 156 | s32* out_xmm_offset) { | ||
| 157 | const auto count = (regs & ABI_ALL_GPRS).count(); | ||
| 158 | rsp_alignment -= count * 8; | ||
| 159 | size_t subtraction = 0; | ||
| 160 | const auto xmm_count = (regs & ABI_ALL_XMMS).count(); | ||
| 161 | if (xmm_count) { | ||
| 162 | // If we have any XMMs to save, we must align the stack here. | ||
| 163 | subtraction = rsp_alignment & 0xF; | ||
| 164 | } | ||
| 165 | subtraction += 0x10 * xmm_count; | ||
| 166 | size_t xmm_base_subtraction = subtraction; | ||
| 167 | subtraction += needed_frame_size; | ||
| 168 | subtraction += ABI_SHADOW_SPACE; | ||
| 169 | // Final alignment. | ||
| 170 | rsp_alignment -= subtraction; | ||
| 171 | subtraction += rsp_alignment & 0xF; | ||
| 172 | |||
| 173 | *out_subtraction = (s32)subtraction; | ||
| 174 | *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction); | ||
| 175 | } | ||
| 176 | |||
| 177 | inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 178 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 179 | s32 subtraction, xmm_offset; | ||
| 180 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 181 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 182 | if (regs[i] && ABI_ALL_GPRS[i]) { | ||
| 183 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | if (subtraction != 0) { | ||
| 187 | code.sub(code.rsp, subtraction); | ||
| 188 | } | ||
| 189 | |||
| 190 | for (int i = 0; i < regs.count(); i++) { | ||
| 191 | if (regs.test(i) & ABI_ALL_GPRS.test(i)) { | ||
| 192 | code.push(IndexToReg64(i)); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 197 | if (regs[i] && ABI_ALL_XMMS[i]) { | ||
| 198 | code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i))); | ||
| 199 | xmm_offset += 0x10; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | return ABI_SHADOW_SPACE; | ||
| 204 | } | ||
| 205 | |||
| 206 | inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 207 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 208 | s32 subtraction, xmm_offset; | ||
| 209 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 210 | |||
| 211 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 212 | if (regs[i] && ABI_ALL_XMMS[i]) { | ||
| 213 | code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]); | ||
| 214 | xmm_offset += 0x10; | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | if (subtraction != 0) { | ||
| 219 | code.add(code.rsp, subtraction); | ||
| 220 | } | ||
| 221 | |||
| 222 | // GPRs need to be popped in reverse order | ||
| 223 | for (int i = 15; i >= 0; i--) { | ||
| 224 | if (regs[i]) { | ||
| 225 | code.pop(IndexToReg64(i)); | ||
| 226 | } | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 231 | size_t rsp_alignment, | ||
| 232 | size_t needed_frame_size = 0) { | ||
| 233 | s32 subtraction, xmm_offset; | ||
| 234 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 235 | |||
| 236 | for (std::size_t i = 0; i < regs.size(); ++i) { | ||
| 237 | if (regs[i] && ABI_ALL_GPRS[i]) { | ||
| 238 | code.push(IndexToReg64(static_cast<int>(i))); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | if (subtraction != 0) { | ||
| 243 | code.sub(code.rsp, subtraction); | ||
| 244 | } | ||
| 245 | |||
| 246 | return ABI_SHADOW_SPACE; | ||
| 247 | } | ||
| 248 | |||
| 249 | inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs, | ||
| 250 | size_t rsp_alignment, size_t needed_frame_size = 0) { | ||
| 251 | s32 subtraction, xmm_offset; | ||
| 252 | ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset); | ||
| 253 | |||
| 254 | if (subtraction != 0) { | ||
| 255 | code.add(code.rsp, subtraction); | ||
| 256 | } | ||
| 257 | |||
| 258 | // GPRs need to be popped in reverse order | ||
| 259 | for (int i = 15; i >= 0; i--) { | ||
| 260 | if (regs[i]) { | ||
| 261 | code.pop(IndexToReg64(i)); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | } // namespace Common::X64 | ||
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h new file mode 100644 index 000000000..df17f8cbe --- /dev/null +++ b/src/common/x64/xbyak_util.h | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2016 Citra Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include <xbyak.h> | ||
| 9 | #include "common/x64/xbyak_abi.h" | ||
| 10 | |||
| 11 | namespace Common::X64 { | ||
| 12 | |||
| 13 | // Constants for use with cmpps/cmpss | ||
| 14 | enum { | ||
| 15 | CMP_EQ = 0, | ||
| 16 | CMP_LT = 1, | ||
| 17 | CMP_LE = 2, | ||
| 18 | CMP_UNORD = 3, | ||
| 19 | CMP_NEQ = 4, | ||
| 20 | CMP_NLT = 5, | ||
| 21 | CMP_NLE = 6, | ||
| 22 | CMP_ORD = 7, | ||
| 23 | }; | ||
| 24 | |||
| 25 | constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) { | ||
| 26 | const u64 distance = target - (ref + 5); | ||
| 27 | return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL); | ||
| 28 | } | ||
| 29 | |||
| 30 | inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) { | ||
| 31 | return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target); | ||
| 32 | } | ||
| 33 | |||
| 34 | template <typename T> | ||
| 35 | inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) { | ||
| 36 | static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer."); | ||
| 37 | size_t addr = reinterpret_cast<size_t>(f); | ||
| 38 | if (IsWithin2G(code, addr)) { | ||
| 39 | code.call(f); | ||
| 40 | } else { | ||
| 41 | // ABI_RETURN is a safe temp register to use before a call | ||
| 42 | code.mov(ABI_RETURN, addr); | ||
| 43 | code.call(ABI_RETURN); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | } // namespace Common::X64 | ||
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp index b93aa6935..c47ff863e 100644 --- a/src/core/file_sys/patch_manager.cpp +++ b/src/core/file_sys/patch_manager.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/file_util.h" | 10 | #include "common/file_util.h" |
| 11 | #include "common/hex_util.h" | 11 | #include "common/hex_util.h" |
| 12 | #include "common/logging/log.h" | 12 | #include "common/logging/log.h" |
| 13 | #include "common/string_util.h" | ||
| 13 | #include "core/core.h" | 14 | #include "core/core.h" |
| 14 | #include "core/file_sys/content_archive.h" | 15 | #include "core/file_sys/content_archive.h" |
| 15 | #include "core/file_sys/control_metadata.h" | 16 | #include "core/file_sys/control_metadata.h" |
| @@ -48,6 +49,23 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) { | |||
| 48 | return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]); | 49 | return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]); |
| 49 | } | 50 | } |
| 50 | 51 | ||
| 52 | std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir, | ||
| 53 | std::string_view name) { | ||
| 54 | #ifdef _WIN32 | ||
| 55 | return dir->GetSubdirectory(name); | ||
| 56 | #else | ||
| 57 | const auto subdirs = dir->GetSubdirectories(); | ||
| 58 | for (const auto& subdir : subdirs) { | ||
| 59 | std::string dir_name = Common::ToLower(subdir->GetName()); | ||
| 60 | if (dir_name == name) { | ||
| 61 | return subdir; | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | return nullptr; | ||
| 66 | #endif | ||
| 67 | } | ||
| 68 | |||
| 51 | PatchManager::PatchManager(u64 title_id) : title_id(title_id) {} | 69 | PatchManager::PatchManager(u64 title_id) : title_id(title_id) {} |
| 52 | 70 | ||
| 53 | PatchManager::~PatchManager() = default; | 71 | PatchManager::~PatchManager() = default; |
| @@ -104,7 +122,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const { | |||
| 104 | if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end()) | 122 | if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end()) |
| 105 | continue; | 123 | continue; |
| 106 | 124 | ||
| 107 | auto exefs_dir = subdir->GetSubdirectory("exefs"); | 125 | auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs"); |
| 108 | if (exefs_dir != nullptr) | 126 | if (exefs_dir != nullptr) |
| 109 | layers.push_back(std::move(exefs_dir)); | 127 | layers.push_back(std::move(exefs_dir)); |
| 110 | } | 128 | } |
| @@ -130,7 +148,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD | |||
| 130 | if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) | 148 | if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) |
| 131 | continue; | 149 | continue; |
| 132 | 150 | ||
| 133 | auto exefs_dir = subdir->GetSubdirectory("exefs"); | 151 | auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs"); |
| 134 | if (exefs_dir != nullptr) { | 152 | if (exefs_dir != nullptr) { |
| 135 | for (const auto& file : exefs_dir->GetFiles()) { | 153 | for (const auto& file : exefs_dir->GetFiles()) { |
| 136 | if (file->GetExtension() == "ips") { | 154 | if (file->GetExtension() == "ips") { |
| @@ -295,7 +313,7 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList( | |||
| 295 | continue; | 313 | continue; |
| 296 | } | 314 | } |
| 297 | 315 | ||
| 298 | auto cheats_dir = subdir->GetSubdirectory("cheats"); | 316 | auto cheats_dir = FindSubdirectoryCaseless(subdir, "cheats"); |
| 299 | if (cheats_dir != nullptr) { | 317 | if (cheats_dir != nullptr) { |
| 300 | auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true); | 318 | auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true); |
| 301 | if (res.has_value()) { | 319 | if (res.has_value()) { |
| @@ -340,11 +358,11 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t | |||
| 340 | continue; | 358 | continue; |
| 341 | } | 359 | } |
| 342 | 360 | ||
| 343 | auto romfs_dir = subdir->GetSubdirectory("romfs"); | 361 | auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs"); |
| 344 | if (romfs_dir != nullptr) | 362 | if (romfs_dir != nullptr) |
| 345 | layers.push_back(std::move(romfs_dir)); | 363 | layers.push_back(std::move(romfs_dir)); |
| 346 | 364 | ||
| 347 | auto ext_dir = subdir->GetSubdirectory("romfs_ext"); | 365 | auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext"); |
| 348 | if (ext_dir != nullptr) | 366 | if (ext_dir != nullptr) |
| 349 | layers_ext.push_back(std::move(ext_dir)); | 367 | layers_ext.push_back(std::move(ext_dir)); |
| 350 | } | 368 | } |
| @@ -470,7 +488,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam | |||
| 470 | for (const auto& mod : mod_dir->GetSubdirectories()) { | 488 | for (const auto& mod : mod_dir->GetSubdirectories()) { |
| 471 | std::string types; | 489 | std::string types; |
| 472 | 490 | ||
| 473 | const auto exefs_dir = mod->GetSubdirectory("exefs"); | 491 | const auto exefs_dir = FindSubdirectoryCaseless(mod, "exefs"); |
| 474 | if (IsDirValidAndNonEmpty(exefs_dir)) { | 492 | if (IsDirValidAndNonEmpty(exefs_dir)) { |
| 475 | bool ips = false; | 493 | bool ips = false; |
| 476 | bool ipswitch = false; | 494 | bool ipswitch = false; |
| @@ -494,9 +512,9 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam | |||
| 494 | if (layeredfs) | 512 | if (layeredfs) |
| 495 | AppendCommaIfNotEmpty(types, "LayeredExeFS"); | 513 | AppendCommaIfNotEmpty(types, "LayeredExeFS"); |
| 496 | } | 514 | } |
| 497 | if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs"))) | 515 | if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "romfs"))) |
| 498 | AppendCommaIfNotEmpty(types, "LayeredFS"); | 516 | AppendCommaIfNotEmpty(types, "LayeredFS"); |
| 499 | if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats"))) | 517 | if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "cheats"))) |
| 500 | AppendCommaIfNotEmpty(types, "Cheats"); | 518 | AppendCommaIfNotEmpty(types, "Cheats"); |
| 501 | 519 | ||
| 502 | if (types.empty()) | 520 | if (types.empty()) |
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h index ec6db524d..f4cb918dd 100644 --- a/src/core/file_sys/patch_manager.h +++ b/src/core/file_sys/patch_manager.h | |||
| @@ -29,6 +29,11 @@ enum class TitleVersionFormat : u8 { | |||
| 29 | std::string FormatTitleVersion(u32 version, | 29 | std::string FormatTitleVersion(u32 version, |
| 30 | TitleVersionFormat format = TitleVersionFormat::ThreeElements); | 30 | TitleVersionFormat format = TitleVersionFormat::ThreeElements); |
| 31 | 31 | ||
| 32 | // Returns a directory with name matching name case-insensitive. Returns nullptr if directory | ||
| 33 | // doesn't have a directory with name. | ||
| 34 | std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir, | ||
| 35 | std::string_view name); | ||
| 36 | |||
| 32 | // A centralized class to manage patches to games. | 37 | // A centralized class to manage patches to games. |
| 33 | class PatchManager { | 38 | class PatchManager { |
| 34 | public: | 39 | public: |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f00c71dae..d6ee82836 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -229,7 +229,7 @@ endif() | |||
| 229 | create_target_directory_groups(video_core) | 229 | create_target_directory_groups(video_core) |
| 230 | 230 | ||
| 231 | target_link_libraries(video_core PUBLIC common core) | 231 | target_link_libraries(video_core PUBLIC common core) |
| 232 | target_link_libraries(video_core PRIVATE glad) | 232 | target_link_libraries(video_core PRIVATE glad xbyak) |
| 233 | 233 | ||
| 234 | if (ENABLE_VULKAN) | 234 | if (ENABLE_VULKAN) |
| 235 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) | 235 | target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b5a70b9fc..13ef2e42d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -461,8 +461,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 461 | 461 | ||
| 462 | void Maxwell3D::ProcessQueryGet() { | 462 | void Maxwell3D::ProcessQueryGet() { |
| 463 | // TODO(Subv): Support the other query units. | 463 | // TODO(Subv): Support the other query units. |
| 464 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 464 | if (regs.query.query_get.unit != Regs::QueryUnit::Crop) { |
| 465 | "Units other than CROP are unimplemented"); | 465 | LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented"); |
| 466 | } | ||
| 466 | 467 | ||
| 467 | switch (regs.query.query_get.operation) { | 468 | switch (regs.query.query_get.operation) { |
| 468 | case Regs::QueryOperation::Release: | 469 | case Regs::QueryOperation::Release: |
| @@ -538,8 +539,8 @@ void Maxwell3D::ProcessCounterReset() { | |||
| 538 | rasterizer.ResetCounter(QueryType::SamplesPassed); | 539 | rasterizer.ResetCounter(QueryType::SamplesPassed); |
| 539 | break; | 540 | break; |
| 540 | default: | 541 | default: |
| 541 | LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", | 542 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", |
| 542 | static_cast<int>(regs.counter_reset)); | 543 | static_cast<int>(regs.counter_reset)); |
| 543 | break; | 544 | break; |
| 544 | } | 545 | } |
| 545 | } | 546 | } |
| @@ -596,8 +597,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { | |||
| 596 | system.GPU().GetTicks()); | 597 | system.GPU().GetTicks()); |
| 597 | return {}; | 598 | return {}; |
| 598 | default: | 599 | default: |
| 599 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | 600 | LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", |
| 600 | static_cast<u32>(regs.query.query_get.select.Value())); | 601 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 601 | return 1; | 602 | return 1; |
| 602 | } | 603 | } |
| 603 | } | 604 | } |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 466a911db..e1b245288 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -166,8 +166,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 166 | 166 | ||
| 167 | const bool is_nvidia = vendor == "NVIDIA Corporation"; | 167 | const bool is_nvidia = vendor == "NVIDIA Corporation"; |
| 168 | const bool is_amd = vendor == "ATI Technologies Inc."; | 168 | const bool is_amd = vendor == "ATI Technologies Inc."; |
| 169 | const bool is_intel = vendor == "Intel"; | ||
| 170 | const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr; | ||
| 171 | 169 | ||
| 172 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 170 | uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 173 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 171 | shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| @@ -182,7 +180,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} { | |||
| 182 | has_variable_aoffi = TestVariableAoffi(); | 180 | has_variable_aoffi = TestVariableAoffi(); |
| 183 | has_component_indexing_bug = is_amd; | 181 | has_component_indexing_bug = is_amd; |
| 184 | has_precise_bug = TestPreciseBug(); | 182 | has_precise_bug = TestPreciseBug(); |
| 185 | has_broken_compute = is_intel_proprietary; | ||
| 186 | has_fast_buffer_sub_data = is_nvidia; | 183 | has_fast_buffer_sub_data = is_nvidia; |
| 187 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && | 184 | use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && |
| 188 | GLAD_GL_NV_compute_program5; | 185 | GLAD_GL_NV_compute_program5; |
| @@ -206,7 +203,6 @@ Device::Device(std::nullptr_t) { | |||
| 206 | has_image_load_formatted = true; | 203 | has_image_load_formatted = true; |
| 207 | has_variable_aoffi = true; | 204 | has_variable_aoffi = true; |
| 208 | has_component_indexing_bug = false; | 205 | has_component_indexing_bug = false; |
| 209 | has_broken_compute = false; | ||
| 210 | has_precise_bug = false; | 206 | has_precise_bug = false; |
| 211 | } | 207 | } |
| 212 | 208 | ||
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e915dbd86..683ed9002 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -80,10 +80,6 @@ public: | |||
| 80 | return has_precise_bug; | 80 | return has_precise_bug; |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | bool HasBrokenCompute() const { | ||
| 84 | return has_broken_compute; | ||
| 85 | } | ||
| 86 | |||
| 87 | bool HasFastBufferSubData() const { | 83 | bool HasFastBufferSubData() const { |
| 88 | return has_fast_buffer_sub_data; | 84 | return has_fast_buffer_sub_data; |
| 89 | } | 85 | } |
| @@ -109,7 +105,6 @@ private: | |||
| 109 | bool has_variable_aoffi{}; | 105 | bool has_variable_aoffi{}; |
| 110 | bool has_component_indexing_bug{}; | 106 | bool has_component_indexing_bug{}; |
| 111 | bool has_precise_bug{}; | 107 | bool has_precise_bug{}; |
| 112 | bool has_broken_compute{}; | ||
| 113 | bool has_fast_buffer_sub_data{}; | 108 | bool has_fast_buffer_sub_data{}; |
| 114 | bool use_assembly_shaders{}; | 109 | bool use_assembly_shaders{}; |
| 115 | }; | 110 | }; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8116a5daa..3c421dd16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -655,10 +655,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 655 | } | 655 | } |
| 656 | 656 | ||
| 657 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 657 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| 658 | if (device.HasBrokenCompute()) { | ||
| 659 | return; | ||
| 660 | } | ||
| 661 | |||
| 662 | buffer_cache.Acquire(); | 658 | buffer_cache.Acquire(); |
| 663 | current_cbuf = 0; | 659 | current_cbuf = 0; |
| 664 | 660 | ||
| @@ -977,16 +973,12 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu | |||
| 977 | glBindTextureUnit(binding, 0); | 973 | glBindTextureUnit(binding, 0); |
| 978 | return; | 974 | return; |
| 979 | } | 975 | } |
| 980 | glBindTextureUnit(binding, view->GetTexture()); | 976 | const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source, |
| 981 | 977 | texture.tic.z_source, texture.tic.w_source); | |
| 982 | if (view->GetSurfaceParams().IsBuffer()) { | 978 | glBindTextureUnit(binding, handle); |
| 983 | return; | 979 | if (!view->GetSurfaceParams().IsBuffer()) { |
| 980 | glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); | ||
| 984 | } | 981 | } |
| 985 | // Apply swizzle to textures that are not buffers. | ||
| 986 | view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | ||
| 987 | texture.tic.w_source); | ||
| 988 | |||
| 989 | glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); | ||
| 990 | } | 982 | } |
| 991 | 983 | ||
| 992 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | 984 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { |
| @@ -1015,14 +1007,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t | |||
| 1015 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); | 1007 | glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); |
| 1016 | return; | 1008 | return; |
| 1017 | } | 1009 | } |
| 1018 | if (!tic.IsBuffer()) { | ||
| 1019 | view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); | ||
| 1020 | } | ||
| 1021 | if (entry.is_written) { | 1010 | if (entry.is_written) { |
| 1022 | view->MarkAsModified(texture_cache.Tick()); | 1011 | view->MarkAsModified(texture_cache.Tick()); |
| 1023 | } | 1012 | } |
| 1024 | glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, | 1013 | const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source); |
| 1025 | view->GetFormat()); | 1014 | glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat()); |
| 1026 | } | 1015 | } |
| 1027 | 1016 | ||
| 1028 | void RasterizerOpenGL::SyncViewport() { | 1017 | void RasterizerOpenGL::SyncViewport() { |
| @@ -1031,6 +1020,26 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 1031 | const auto& regs = gpu.regs; | 1020 | const auto& regs = gpu.regs; |
| 1032 | 1021 | ||
| 1033 | const bool dirty_viewport = flags[Dirty::Viewports]; | 1022 | const bool dirty_viewport = flags[Dirty::Viewports]; |
| 1023 | const bool dirty_clip_control = flags[Dirty::ClipControl]; | ||
| 1024 | |||
| 1025 | if (dirty_clip_control || flags[Dirty::FrontFace]) { | ||
| 1026 | flags[Dirty::FrontFace] = false; | ||
| 1027 | |||
| 1028 | GLenum mode = MaxwellToGL::FrontFace(regs.front_face); | ||
| 1029 | if (regs.screen_y_control.triangle_rast_flip != 0 && | ||
| 1030 | regs.viewport_transform[0].scale_y < 0.0f) { | ||
| 1031 | switch (mode) { | ||
| 1032 | case GL_CW: | ||
| 1033 | mode = GL_CCW; | ||
| 1034 | break; | ||
| 1035 | case GL_CCW: | ||
| 1036 | mode = GL_CW; | ||
| 1037 | break; | ||
| 1038 | } | ||
| 1039 | } | ||
| 1040 | glFrontFace(mode); | ||
| 1041 | } | ||
| 1042 | |||
| 1034 | if (dirty_viewport || flags[Dirty::ClipControl]) { | 1043 | if (dirty_viewport || flags[Dirty::ClipControl]) { |
| 1035 | flags[Dirty::ClipControl] = false; | 1044 | flags[Dirty::ClipControl] = false; |
| 1036 | 1045 | ||
| @@ -1128,11 +1137,6 @@ void RasterizerOpenGL::SyncCullMode() { | |||
| 1128 | glDisable(GL_CULL_FACE); | 1137 | glDisable(GL_CULL_FACE); |
| 1129 | } | 1138 | } |
| 1130 | } | 1139 | } |
| 1131 | |||
| 1132 | if (flags[Dirty::FrontFace]) { | ||
| 1133 | flags[Dirty::FrontFace] = false; | ||
| 1134 | glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); | ||
| 1135 | } | ||
| 1136 | } | 1140 | } |
| 1137 | 1141 | ||
| 1138 | void RasterizerOpenGL::SyncPrimitiveRestart() { | 1142 | void RasterizerOpenGL::SyncPrimitiveRestart() { |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 253484968..9cb115959 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -2344,7 +2344,12 @@ private: | |||
| 2344 | return {}; | 2344 | return {}; |
| 2345 | } | 2345 | } |
| 2346 | 2346 | ||
| 2347 | Expression MemoryBarrierGL(Operation) { | 2347 | Expression MemoryBarrierGroup(Operation) { |
| 2348 | code.AddLine("groupMemoryBarrier();"); | ||
| 2349 | return {}; | ||
| 2350 | } | ||
| 2351 | |||
| 2352 | Expression MemoryBarrierGlobal(Operation) { | ||
| 2348 | code.AddLine("memoryBarrier();"); | 2353 | code.AddLine("memoryBarrier();"); |
| 2349 | return {}; | 2354 | return {}; |
| 2350 | } | 2355 | } |
| @@ -2591,7 +2596,8 @@ private: | |||
| 2591 | &GLSLDecompiler::ShuffleIndexed, | 2596 | &GLSLDecompiler::ShuffleIndexed, |
| 2592 | 2597 | ||
| 2593 | &GLSLDecompiler::Barrier, | 2598 | &GLSLDecompiler::Barrier, |
| 2594 | &GLSLDecompiler::MemoryBarrierGL, | 2599 | &GLSLDecompiler::MemoryBarrierGroup, |
| 2600 | &GLSLDecompiler::MemoryBarrierGlobal, | ||
| 2595 | }; | 2601 | }; |
| 2596 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2602 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2597 | 2603 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 96605db84..8e754fa90 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -47,6 +47,10 @@ void ProgramManager::BindHostPipeline(GLuint pipeline) { | |||
| 47 | old_state.geometry = 0; | 47 | old_state.geometry = 0; |
| 48 | glDisable(GL_GEOMETRY_PROGRAM_NV); | 48 | glDisable(GL_GEOMETRY_PROGRAM_NV); |
| 49 | } | 49 | } |
| 50 | } else { | ||
| 51 | if (!is_graphics_bound) { | ||
| 52 | glUseProgram(0); | ||
| 53 | } | ||
| 50 | } | 54 | } |
| 51 | glBindProgramPipeline(pipeline); | 55 | glBindProgramPipeline(pipeline); |
| 52 | } | 56 | } |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 94fbd2a22..4faa8b90c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | |||
| 35 | namespace { | 35 | namespace { |
| 36 | 36 | ||
| 37 | struct FormatTuple { | 37 | struct FormatTuple { |
| 38 | GLint internal_format; | 38 | GLenum internal_format; |
| 39 | GLenum format = GL_NONE; | 39 | GLenum format = GL_NONE; |
| 40 | GLenum type = GL_NONE; | 40 | GLenum type = GL_NONE; |
| 41 | }; | 41 | }; |
| @@ -238,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte | |||
| 238 | return texture; | 238 | return texture; |
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source, | ||
| 242 | SwizzleSource w_source) { | ||
| 243 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 244 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 245 | } | ||
| 246 | |||
| 241 | } // Anonymous namespace | 247 | } // Anonymous namespace |
| 242 | 248 | ||
| 243 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, | 249 | CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, |
| @@ -381,7 +387,7 @@ void CachedSurface::DecorateSurfaceName() { | |||
| 381 | } | 387 | } |
| 382 | 388 | ||
| 383 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { | 389 | void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { |
| 384 | LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); | 390 | LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); |
| 385 | } | 391 | } |
| 386 | 392 | ||
| 387 | View CachedSurface::CreateView(const ViewParams& view_key) { | 393 | View CachedSurface::CreateView(const ViewParams& view_key) { |
| @@ -397,14 +403,13 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr | |||
| 397 | } | 403 | } |
| 398 | 404 | ||
| 399 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, | 405 | CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, |
| 400 | const bool is_proxy) | 406 | bool is_proxy) |
| 401 | : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { | 407 | : VideoCommon::ViewBase(params), surface{surface}, |
| 402 | target = GetTextureTarget(params.target); | 408 | format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format}, |
| 403 | format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; | 409 | target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { |
| 404 | if (!is_proxy) { | 410 | if (!is_proxy) { |
| 405 | texture_view = CreateTextureView(); | 411 | main_view = CreateTextureView(); |
| 406 | } | 412 | } |
| 407 | swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); | ||
| 408 | } | 413 | } |
| 409 | 414 | ||
| 410 | CachedSurfaceView::~CachedSurfaceView() = default; | 415 | CachedSurfaceView::~CachedSurfaceView() = default; |
| @@ -447,27 +452,49 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { | |||
| 447 | } | 452 | } |
| 448 | } | 453 | } |
| 449 | 454 | ||
| 450 | void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, | 455 | GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source, |
| 451 | SwizzleSource z_source, SwizzleSource w_source) { | 456 | SwizzleSource z_source, SwizzleSource w_source) { |
| 452 | u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 457 | if (GetSurfaceParams().IsBuffer()) { |
| 453 | if (new_swizzle == swizzle) | 458 | return GetTexture(); |
| 454 | return; | 459 | } |
| 455 | swizzle = new_swizzle; | 460 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); |
| 456 | const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), | 461 | if (current_swizzle == new_swizzle) { |
| 457 | GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; | 462 | return current_view; |
| 458 | const GLuint handle = GetTexture(); | 463 | } |
| 459 | const PixelFormat format = surface.GetSurfaceParams().pixel_format; | 464 | current_swizzle = new_swizzle; |
| 460 | switch (format) { | 465 | |
| 466 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); | ||
| 467 | OGLTextureView& view = entry->second; | ||
| 468 | if (!is_cache_miss) { | ||
| 469 | current_view = view.handle; | ||
| 470 | return view.handle; | ||
| 471 | } | ||
| 472 | view = CreateTextureView(); | ||
| 473 | current_view = view.handle; | ||
| 474 | |||
| 475 | std::array swizzle{x_source, y_source, z_source, w_source}; | ||
| 476 | |||
| 477 | switch (const PixelFormat format = GetSurfaceParams().pixel_format) { | ||
| 461 | case PixelFormat::Z24S8: | 478 | case PixelFormat::Z24S8: |
| 462 | case PixelFormat::Z32FS8: | 479 | case PixelFormat::Z32FS8: |
| 463 | case PixelFormat::S8Z24: | 480 | case PixelFormat::S8Z24: |
| 464 | glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, | 481 | UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G); |
| 482 | glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE, | ||
| 465 | GetComponent(format, x_source == SwizzleSource::R)); | 483 | GetComponent(format, x_source == SwizzleSource::R)); |
| 484 | |||
| 485 | // Make sure we sample the first component | ||
| 486 | std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) { | ||
| 487 | return value == SwizzleSource::G ? SwizzleSource::R : value; | ||
| 488 | }); | ||
| 489 | [[fallthrough]]; | ||
| 490 | default: { | ||
| 491 | const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]), | ||
| 492 | GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])}; | ||
| 493 | glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | ||
| 466 | break; | 494 | break; |
| 467 | default: | ||
| 468 | glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); | ||
| 469 | break; | ||
| 470 | } | 495 | } |
| 496 | } | ||
| 497 | return view.handle; | ||
| 471 | } | 498 | } |
| 472 | 499 | ||
| 473 | OGLTextureView CachedSurfaceView::CreateTextureView() const { | 500 | OGLTextureView CachedSurfaceView::CreateTextureView() const { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 02d9981a1..8a2ac8603 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -83,7 +83,7 @@ public: | |||
| 83 | /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER | 83 | /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER |
| 84 | void Attach(GLenum attachment, GLenum target) const; | 84 | void Attach(GLenum attachment, GLenum target) const; |
| 85 | 85 | ||
| 86 | void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, | 86 | GLuint GetTexture(Tegra::Texture::SwizzleSource x_source, |
| 87 | Tegra::Texture::SwizzleSource y_source, | 87 | Tegra::Texture::SwizzleSource y_source, |
| 88 | Tegra::Texture::SwizzleSource z_source, | 88 | Tegra::Texture::SwizzleSource z_source, |
| 89 | Tegra::Texture::SwizzleSource w_source); | 89 | Tegra::Texture::SwizzleSource w_source); |
| @@ -98,7 +98,7 @@ public: | |||
| 98 | if (is_proxy) { | 98 | if (is_proxy) { |
| 99 | return surface.GetTexture(); | 99 | return surface.GetTexture(); |
| 100 | } | 100 | } |
| 101 | return texture_view.handle; | 101 | return main_view.handle; |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | GLenum GetFormat() const { | 104 | GLenum GetFormat() const { |
| @@ -110,23 +110,19 @@ public: | |||
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | private: | 112 | private: |
| 113 | u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, | ||
| 114 | Tegra::Texture::SwizzleSource y_source, | ||
| 115 | Tegra::Texture::SwizzleSource z_source, | ||
| 116 | Tegra::Texture::SwizzleSource w_source) const { | ||
| 117 | return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | | ||
| 118 | (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); | ||
| 119 | } | ||
| 120 | |||
| 121 | OGLTextureView CreateTextureView() const; | 113 | OGLTextureView CreateTextureView() const; |
| 122 | 114 | ||
| 123 | CachedSurface& surface; | 115 | CachedSurface& surface; |
| 124 | GLenum target{}; | 116 | const GLenum format; |
| 125 | GLenum format{}; | 117 | const GLenum target; |
| 118 | const bool is_proxy; | ||
| 119 | |||
| 120 | std::unordered_map<u32, OGLTextureView> view_cache; | ||
| 121 | OGLTextureView main_view; | ||
| 126 | 122 | ||
| 127 | OGLTextureView texture_view; | 123 | // Use an invalid default so it always fails the comparison test |
| 128 | u32 swizzle{}; | 124 | u32 current_swizzle = 0xffffffff; |
| 129 | bool is_proxy{}; | 125 | GLuint current_view = 0; |
| 130 | }; | 126 | }; |
| 131 | 127 | ||
| 132 | class TextureCacheOpenGL final : public TextureCacheBase { | 128 | class TextureCacheOpenGL final : public TextureCacheBase { |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 6b489e6db..e7952924a 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() { | |||
| 753 | bool RendererOpenGL::Init() { | 753 | bool RendererOpenGL::Init() { |
| 754 | if (GLAD_GL_KHR_debug) { | 754 | if (GLAD_GL_KHR_debug) { |
| 755 | glEnable(GL_DEBUG_OUTPUT); | 755 | glEnable(GL_DEBUG_OUTPUT); |
| 756 | if (Settings::values.renderer_debug) { | ||
| 757 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||
| 758 | } | ||
| 756 | glDebugMessageCallback(DebugHandler, nullptr); | 759 | glDebugMessageCallback(DebugHandler, nullptr); |
| 757 | } | 760 | } |
| 758 | 761 | ||
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 568744e3c..424278816 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp | |||
| @@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { | |||
| 71 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); | 71 | const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); |
| 72 | 72 | ||
| 73 | u32 packed_front_face = PackFrontFace(regs.front_face); | 73 | u32 packed_front_face = PackFrontFace(regs.front_face); |
| 74 | if (regs.screen_y_control.triangle_rast_flip != 0 && | 74 | if (regs.screen_y_control.triangle_rast_flip != 0) { |
| 75 | regs.viewport_transform[0].scale_y > 0.0f) { | ||
| 76 | // Flip front face | 75 | // Flip front face |
| 77 | packed_front_face = 1 - packed_front_face; | 76 | packed_front_face = 1 - packed_front_face; |
| 78 | } | 77 | } |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 12be691a5..2871035f5 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -142,7 +142,7 @@ struct FormatTuple { | |||
| 142 | {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 | 142 | {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 |
| 143 | {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 | 143 | {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 |
| 144 | {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 | 144 | {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 |
| 145 | {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 | 145 | {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8 |
| 146 | {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F | 146 | {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F |
| 147 | {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F | 147 | {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F |
| 148 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F | 148 | {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F |
| @@ -168,7 +168,7 @@ struct FormatTuple { | |||
| 168 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 | 168 | {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 |
| 169 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 | 169 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 |
| 170 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 | 170 | {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 |
| 171 | {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB | 171 | {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB |
| 172 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB | 172 | {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB |
| 173 | {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB | 173 | {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB |
| 174 | {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB | 174 | {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f0c491d00..750e5a0ca 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -104,6 +104,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( | |||
| 104 | VK_FORMAT_R16_SFLOAT, | 104 | VK_FORMAT_R16_SFLOAT, |
| 105 | VK_FORMAT_R16G16B16A16_SFLOAT, | 105 | VK_FORMAT_R16G16B16A16_SFLOAT, |
| 106 | VK_FORMAT_B8G8R8A8_UNORM, | 106 | VK_FORMAT_B8G8R8A8_UNORM, |
| 107 | VK_FORMAT_B8G8R8A8_SRGB, | ||
| 107 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, | 108 | VK_FORMAT_R4G4B4A4_UNORM_PACK16, |
| 108 | VK_FORMAT_D32_SFLOAT, | 109 | VK_FORMAT_D32_SFLOAT, |
| 109 | VK_FORMAT_D16_UNORM, | 110 | VK_FORMAT_D16_UNORM, |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a5c7b7945..65a1c6245 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 312 | ASSERT(point_size != 0.0f); | 312 | ASSERT(point_size != 0.0f); |
| 313 | } | 313 | } |
| 314 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { | 314 | for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { |
| 315 | specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); | 315 | const auto& attribute = fixed_state.vertex_input.attributes[i]; |
| 316 | specialization.enabled_attributes[i] = attribute.enabled.Value() != 0; | ||
| 317 | specialization.attribute_types[i] = attribute.Type(); | ||
| 316 | } | 318 | } |
| 317 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; | 319 | specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; |
| 318 | 320 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index be5b77fae..a3d992ed3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex | |||
| 877 | 877 | ||
| 878 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { | 878 | for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { |
| 879 | const auto& attrib = regs.vertex_attrib_format[index]; | 879 | const auto& attrib = regs.vertex_attrib_format[index]; |
| 880 | if (!attrib.IsValid()) { | 880 | if (attrib.IsConstant()) { |
| 881 | vertex_input.SetAttribute(index, false, 0, 0, {}, {}); | 881 | vertex_input.SetAttribute(index, false, 0, 0, {}, {}); |
| 882 | continue; | 882 | continue; |
| 883 | } | 883 | } |
| 884 | |||
| 885 | [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; | ||
| 886 | ASSERT(buffer.IsEnabled()); | ||
| 887 | |||
| 888 | vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), | 884 | vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), |
| 889 | attrib.size.Value()); | 885 | attrib.size.Value()); |
| 890 | } | 886 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 890f34a2c..a13e8baa7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -741,8 +741,10 @@ private: | |||
| 741 | if (!IsGenericAttribute(index)) { | 741 | if (!IsGenericAttribute(index)) { |
| 742 | continue; | 742 | continue; |
| 743 | } | 743 | } |
| 744 | |||
| 745 | const u32 location = GetGenericAttributeLocation(index); | 744 | const u32 location = GetGenericAttributeLocation(index); |
| 745 | if (!IsAttributeEnabled(location)) { | ||
| 746 | continue; | ||
| 747 | } | ||
| 746 | const auto type_descriptor = GetAttributeType(location); | 748 | const auto type_descriptor = GetAttributeType(location); |
| 747 | Id type; | 749 | Id type; |
| 748 | if (IsInputAttributeArray()) { | 750 | if (IsInputAttributeArray()) { |
| @@ -986,6 +988,10 @@ private: | |||
| 986 | return stage == ShaderType::TesselationControl; | 988 | return stage == ShaderType::TesselationControl; |
| 987 | } | 989 | } |
| 988 | 990 | ||
| 991 | bool IsAttributeEnabled(u32 location) const { | ||
| 992 | return stage != ShaderType::Vertex || specialization.enabled_attributes[location]; | ||
| 993 | } | ||
| 994 | |||
| 989 | u32 GetNumInputVertices() const { | 995 | u32 GetNumInputVertices() const { |
| 990 | switch (stage) { | 996 | switch (stage) { |
| 991 | case ShaderType::Geometry: | 997 | case ShaderType::Geometry: |
| @@ -1201,16 +1207,20 @@ private: | |||
| 1201 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); | 1207 | UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); |
| 1202 | return {v_float_zero, Type::Float}; | 1208 | return {v_float_zero, Type::Float}; |
| 1203 | default: | 1209 | default: |
| 1204 | if (IsGenericAttribute(attribute)) { | 1210 | if (!IsGenericAttribute(attribute)) { |
| 1205 | const u32 location = GetGenericAttributeLocation(attribute); | 1211 | break; |
| 1206 | const auto type_descriptor = GetAttributeType(location); | ||
| 1207 | const Type type = type_descriptor.type; | ||
| 1208 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1209 | const std::vector elements = {element}; | ||
| 1210 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1211 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1212 | } | 1212 | } |
| 1213 | break; | 1213 | const u32 location = GetGenericAttributeLocation(attribute); |
| 1214 | if (!IsAttributeEnabled(location)) { | ||
| 1215 | // Disabled attributes (also known as constant attributes) always return zero. | ||
| 1216 | return {v_float_zero, Type::Float}; | ||
| 1217 | } | ||
| 1218 | const auto type_descriptor = GetAttributeType(location); | ||
| 1219 | const Type type = type_descriptor.type; | ||
| 1220 | const Id attribute_id = input_attributes.at(attribute); | ||
| 1221 | const std::vector elements = {element}; | ||
| 1222 | const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements); | ||
| 1223 | return {OpLoad(GetTypeDefinition(type), pointer), type}; | ||
| 1214 | } | 1224 | } |
| 1215 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); | 1225 | UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); |
| 1216 | return {v_float_zero, Type::Float}; | 1226 | return {v_float_zero, Type::Float}; |
| @@ -2215,8 +2225,8 @@ private: | |||
| 2215 | return {}; | 2225 | return {}; |
| 2216 | } | 2226 | } |
| 2217 | 2227 | ||
| 2218 | Expression MemoryBarrierGL(Operation) { | 2228 | template <spv::Scope scope> |
| 2219 | const auto scope = spv::Scope::Device; | 2229 | Expression MemoryBarrier(Operation) { |
| 2220 | const auto semantics = | 2230 | const auto semantics = |
| 2221 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | | 2231 | spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | |
| 2222 | spv::MemorySemanticsMask::WorkgroupMemory | | 2232 | spv::MemorySemanticsMask::WorkgroupMemory | |
| @@ -2681,7 +2691,8 @@ private: | |||
| 2681 | &SPIRVDecompiler::ShuffleIndexed, | 2691 | &SPIRVDecompiler::ShuffleIndexed, |
| 2682 | 2692 | ||
| 2683 | &SPIRVDecompiler::Barrier, | 2693 | &SPIRVDecompiler::Barrier, |
| 2684 | &SPIRVDecompiler::MemoryBarrierGL, | 2694 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>, |
| 2695 | &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>, | ||
| 2685 | }; | 2696 | }; |
| 2686 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); | 2697 | static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); |
| 2687 | 2698 | ||
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index f4c05ac3c..b7af26388 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -88,7 +88,8 @@ struct Specialization final { | |||
| 88 | u32 shared_memory_size{}; | 88 | u32 shared_memory_size{}; |
| 89 | 89 | ||
| 90 | // Graphics specific | 90 | // Graphics specific |
| 91 | std::optional<float> point_size{}; | 91 | std::optional<float> point_size; |
| 92 | std::bitset<Maxwell::NumVertexAttributes> enabled_attributes; | ||
| 92 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; | 93 | std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; |
| 93 | bool ndc_minus_one_to_one{}; | 94 | bool ndc_minus_one_to_one{}; |
| 94 | }; | 95 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 55f43e61b..2f1d5021d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -354,26 +354,23 @@ CachedSurfaceView::~CachedSurfaceView() = default; | |||
| 354 | 354 | ||
| 355 | VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, | 355 | VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, |
| 356 | SwizzleSource z_source, SwizzleSource w_source) { | 356 | SwizzleSource z_source, SwizzleSource w_source) { |
| 357 | const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); | 357 | const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); |
| 358 | if (last_image_view && last_swizzle == swizzle) { | 358 | if (last_image_view && last_swizzle == new_swizzle) { |
| 359 | return last_image_view; | 359 | return last_image_view; |
| 360 | } | 360 | } |
| 361 | last_swizzle = swizzle; | 361 | last_swizzle = new_swizzle; |
| 362 | 362 | ||
| 363 | const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle); | 363 | const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle); |
| 364 | auto& image_view = entry->second; | 364 | auto& image_view = entry->second; |
| 365 | if (!is_cache_miss) { | 365 | if (!is_cache_miss) { |
| 366 | return last_image_view = *image_view; | 366 | return last_image_view = *image_view; |
| 367 | } | 367 | } |
| 368 | 368 | ||
| 369 | auto swizzle_x = MaxwellToVK::SwizzleSource(x_source); | 369 | std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source), |
| 370 | auto swizzle_y = MaxwellToVK::SwizzleSource(y_source); | 370 | MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)}; |
| 371 | auto swizzle_z = MaxwellToVK::SwizzleSource(z_source); | ||
| 372 | auto swizzle_w = MaxwellToVK::SwizzleSource(w_source); | ||
| 373 | |||
| 374 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { | 371 | if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { |
| 375 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. | 372 | // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. |
| 376 | std::swap(swizzle_x, swizzle_z); | 373 | std::swap(swizzle[0], swizzle[2]); |
| 377 | } | 374 | } |
| 378 | 375 | ||
| 379 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on | 376 | // Games can sample depth or stencil values on textures. This is decided by the swizzle value on |
| @@ -395,11 +392,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y | |||
| 395 | UNIMPLEMENTED(); | 392 | UNIMPLEMENTED(); |
| 396 | } | 393 | } |
| 397 | 394 | ||
| 398 | // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity | 395 | // Make sure we sample the first component |
| 399 | swizzle_x = VK_COMPONENT_SWIZZLE_R; | 396 | std::transform( |
| 400 | swizzle_y = VK_COMPONENT_SWIZZLE_G; | 397 | swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) { |
| 401 | swizzle_z = VK_COMPONENT_SWIZZLE_B; | 398 | return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component; |
| 402 | swizzle_w = VK_COMPONENT_SWIZZLE_A; | 399 | }); |
| 403 | } | 400 | } |
| 404 | 401 | ||
| 405 | VkImageViewCreateInfo ci; | 402 | VkImageViewCreateInfo ci; |
| @@ -409,7 +406,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y | |||
| 409 | ci.image = surface.GetImageHandle(); | 406 | ci.image = surface.GetImageHandle(); |
| 410 | ci.viewType = image_view_type; | 407 | ci.viewType = image_view_type; |
| 411 | ci.format = surface.GetImage().GetFormat(); | 408 | ci.format = surface.GetImage().GetFormat(); |
| 412 | ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; | 409 | ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]}; |
| 413 | ci.subresourceRange.aspectMask = aspect; | 410 | ci.subresourceRange.aspectMask = aspect; |
| 414 | ci.subresourceRange.baseMipLevel = base_level; | 411 | ci.subresourceRange.baseMipLevel = base_level; |
| 415 | ci.subresourceRange.levelCount = num_levels; | 412 | ci.subresourceRange.levelCount = num_levels; |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 694b325e1..d00e10913 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -299,9 +299,19 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 299 | break; | 299 | break; |
| 300 | } | 300 | } |
| 301 | case OpCode::Id::MEMBAR: { | 301 | case OpCode::Id::MEMBAR: { |
| 302 | UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL); | ||
| 303 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | 302 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); |
| 304 | bb.push_back(Operation(OperationCode::MemoryBarrierGL)); | 303 | const OperationCode type = [instr] { |
| 304 | switch (instr.membar.type) { | ||
| 305 | case Tegra::Shader::MembarType::CTA: | ||
| 306 | return OperationCode::MemoryBarrierGroup; | ||
| 307 | case Tegra::Shader::MembarType::GL: | ||
| 308 | return OperationCode::MemoryBarrierGlobal; | ||
| 309 | default: | ||
| 310 | UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value())); | ||
| 311 | return OperationCode::MemoryBarrierGlobal; | ||
| 312 | } | ||
| 313 | }(); | ||
| 314 | bb.push_back(Operation(type)); | ||
| 305 | break; | 315 | break; |
| 306 | } | 316 | } |
| 307 | case OpCode::Id::DEPBAR: { | 317 | case OpCode::Id::DEPBAR: { |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c06512413..c5e5165ff 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -233,8 +233,9 @@ enum class OperationCode { | |||
| 233 | ThreadLtMask, /// () -> uint | 233 | ThreadLtMask, /// () -> uint |
| 234 | ShuffleIndexed, /// (uint value, uint index) -> uint | 234 | ShuffleIndexed, /// (uint value, uint index) -> uint |
| 235 | 235 | ||
| 236 | Barrier, /// () -> void | 236 | Barrier, /// () -> void |
| 237 | MemoryBarrierGL, /// () -> void | 237 | MemoryBarrierGroup, /// () -> void |
| 238 | MemoryBarrierGlobal, /// () -> void | ||
| 238 | 239 | ||
| 239 | Amount, | 240 | Amount, |
| 240 | }; | 241 | }; |
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -41,7 +41,7 @@ struct Table { | |||
| 41 | ComponentType alpha_component; | 41 | ComponentType alpha_component; |
| 42 | bool is_srgb; | 42 | bool is_srgb; |
| 43 | }; | 43 | }; |
| 44 | constexpr std::array<Table, 77> DefinitionTable = {{ | 44 | constexpr std::array<Table, 78> DefinitionTable = {{ |
| 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, | 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, |
| 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, | 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, |
| 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, | 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, |
| @@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{ | |||
| 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, | 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, |
| 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, | 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, |
| 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, |
| 101 | {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | ||
| 101 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, | 102 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, |
| 102 | 103 | ||
| 103 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, | 104 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d6efc34b2..45e3ddd2c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <vector> | 15 | #include <vector> |
| 16 | 16 | ||
| 17 | #include <boost/container/small_vector.hpp> | ||
| 17 | #include <boost/icl/interval_map.hpp> | 18 | #include <boost/icl/interval_map.hpp> |
| 18 | #include <boost/range/iterator_range.hpp> | 19 | #include <boost/range/iterator_range.hpp> |
| 19 | 20 | ||
| @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 53 | 54 | ||
| 54 | template <typename TSurface, typename TView> | 55 | template <typename TSurface, typename TView> |
| 55 | class TextureCache { | 56 | class TextureCache { |
| 57 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | ||
| 56 | 58 | ||
| 57 | public: | 59 | public: |
| 58 | void InvalidateRegion(VAddr addr, std::size_t size) { | 60 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| @@ -308,18 +310,20 @@ public: | |||
| 308 | dst_surface.first->MarkAsModified(true, Tick()); | 310 | dst_surface.first->MarkAsModified(true, Tick()); |
| 309 | } | 311 | } |
| 310 | 312 | ||
| 311 | TSurface TryFindFramebufferSurface(VAddr addr) { | 313 | TSurface TryFindFramebufferSurface(VAddr addr) const { |
| 312 | if (!addr) { | 314 | if (!addr) { |
| 313 | return nullptr; | 315 | return nullptr; |
| 314 | } | 316 | } |
| 315 | const VAddr page = addr >> registry_page_bits; | 317 | const VAddr page = addr >> registry_page_bits; |
| 316 | std::vector<TSurface>& list = registry[page]; | 318 | const auto it = registry.find(page); |
| 317 | for (auto& surface : list) { | 319 | if (it == registry.end()) { |
| 318 | if (surface->GetCpuAddr() == addr) { | 320 | return nullptr; |
| 319 | return surface; | ||
| 320 | } | ||
| 321 | } | 321 | } |
| 322 | return nullptr; | 322 | const auto& list = it->second; |
| 323 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 324 | return surface->GetCpuAddr() == addr; | ||
| 325 | }); | ||
| 326 | return found != list.end() ? *found : nullptr; | ||
| 323 | } | 327 | } |
| 324 | 328 | ||
| 325 | u64 Tick() { | 329 | u64 Tick() { |
| @@ -498,7 +502,7 @@ private: | |||
| 498 | * @param untopological Indicates to the recycler that the texture has no way | 502 | * @param untopological Indicates to the recycler that the texture has no way |
| 499 | * to match the overlaps due to topological reasons. | 503 | * to match the overlaps due to topological reasons. |
| 500 | **/ | 504 | **/ |
| 501 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 505 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, |
| 502 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 506 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| 503 | if (Settings::IsGPULevelExtreme()) { | 507 | if (Settings::IsGPULevelExtreme()) { |
| 504 | return RecycleStrategy::Flush; | 508 | return RecycleStrategy::Flush; |
| @@ -538,9 +542,8 @@ private: | |||
| 538 | * @param untopological Indicates to the recycler that the texture has no way to match the | 542 | * @param untopological Indicates to the recycler that the texture has no way to match the |
| 539 | * overlaps due to topological reasons. | 543 | * overlaps due to topological reasons. |
| 540 | **/ | 544 | **/ |
| 541 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | 545 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, |
| 542 | const SurfaceParams& params, const GPUVAddr gpu_addr, | 546 | const GPUVAddr gpu_addr, const bool preserve_contents, |
| 543 | const bool preserve_contents, | ||
| 544 | const MatchTopologyResult untopological) { | 547 | const MatchTopologyResult untopological) { |
| 545 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | 548 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); |
| 546 | for (auto& surface : overlaps) { | 549 | for (auto& surface : overlaps) { |
| @@ -650,7 +653,7 @@ private: | |||
| 650 | * @param params The parameters on the new surface. | 653 | * @param params The parameters on the new surface. |
| 651 | * @param gpu_addr The starting address of the new surface. | 654 | * @param gpu_addr The starting address of the new surface. |
| 652 | **/ | 655 | **/ |
| 653 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | 656 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, |
| 654 | const SurfaceParams& params, | 657 | const SurfaceParams& params, |
| 655 | const GPUVAddr gpu_addr) { | 658 | const GPUVAddr gpu_addr) { |
| 656 | if (params.target == SurfaceTarget::Texture3D) { | 659 | if (params.target == SurfaceTarget::Texture3D) { |
| @@ -708,7 +711,7 @@ private: | |||
| 708 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | 711 | * @param preserve_contents Indicates that the new surface should be loaded from memory or |
| 709 | * left blank. | 712 | * left blank. |
| 710 | */ | 713 | */ |
| 711 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | 714 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, |
| 712 | const SurfaceParams& params, | 715 | const SurfaceParams& params, |
| 713 | const GPUVAddr gpu_addr, | 716 | const GPUVAddr gpu_addr, |
| 714 | const VAddr cpu_addr, | 717 | const VAddr cpu_addr, |
| @@ -810,7 +813,7 @@ private: | |||
| 810 | TSurface& current_surface = iter->second; | 813 | TSurface& current_surface = iter->second; |
| 811 | const auto topological_result = current_surface->MatchesTopology(params); | 814 | const auto topological_result = current_surface->MatchesTopology(params); |
| 812 | if (topological_result != MatchTopologyResult::FullMatch) { | 815 | if (topological_result != MatchTopologyResult::FullMatch) { |
| 813 | std::vector<TSurface> overlaps{current_surface}; | 816 | VectorSurface overlaps{current_surface}; |
| 814 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 817 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
| 815 | topological_result); | 818 | topological_result); |
| 816 | } | 819 | } |
| @@ -991,7 +994,9 @@ private: | |||
| 991 | params.target = target; | 994 | params.target = target; |
| 992 | params.is_tiled = false; | 995 | params.is_tiled = false; |
| 993 | params.srgb_conversion = false; | 996 | params.srgb_conversion = false; |
| 994 | params.is_layered = false; | 997 | params.is_layered = |
| 998 | target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray || | ||
| 999 | target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray; | ||
| 995 | params.block_width = 0; | 1000 | params.block_width = 0; |
| 996 | params.block_height = 0; | 1001 | params.block_height = 0; |
| 997 | params.block_depth = 0; | 1002 | params.block_depth = 0; |
| @@ -1124,23 +1129,25 @@ private: | |||
| 1124 | } | 1129 | } |
| 1125 | } | 1130 | } |
| 1126 | 1131 | ||
| 1127 | std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1132 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { |
| 1128 | if (size == 0) { | 1133 | if (size == 0) { |
| 1129 | return {}; | 1134 | return {}; |
| 1130 | } | 1135 | } |
| 1131 | const VAddr cpu_addr_end = cpu_addr + size; | 1136 | const VAddr cpu_addr_end = cpu_addr + size; |
| 1132 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1133 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1137 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; |
| 1134 | std::vector<TSurface> surfaces; | 1138 | VectorSurface surfaces; |
| 1135 | while (start <= end) { | 1139 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { |
| 1136 | std::vector<TSurface>& list = registry[start]; | 1140 | const auto it = registry.find(start); |
| 1137 | for (auto& surface : list) { | 1141 | if (it == registry.end()) { |
| 1138 | if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1142 | continue; |
| 1139 | surface->MarkAsPicked(true); | 1143 | } |
| 1140 | surfaces.push_back(surface); | 1144 | for (auto& surface : it->second) { |
| 1145 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | ||
| 1146 | continue; | ||
| 1141 | } | 1147 | } |
| 1148 | surface->MarkAsPicked(true); | ||
| 1149 | surfaces.push_back(surface); | ||
| 1142 | } | 1150 | } |
| 1143 | start++; | ||
| 1144 | } | 1151 | } |
| 1145 | for (auto& surface : surfaces) { | 1152 | for (auto& surface : surfaces) { |
| 1146 | surface->MarkAsPicked(false); | 1153 | surface->MarkAsPicked(false); |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 1adf8932b..1f5e43043 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -106,6 +106,9 @@ public: | |||
| 106 | format.setVersion(4, 3); | 106 | format.setVersion(4, 3); |
| 107 | format.setProfile(QSurfaceFormat::CompatibilityProfile); | 107 | format.setProfile(QSurfaceFormat::CompatibilityProfile); |
| 108 | format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); | 108 | format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); |
| 109 | if (Settings::values.renderer_debug) { | ||
| 110 | format.setOption(QSurfaceFormat::FormatOption::DebugContext); | ||
| 111 | } | ||
| 109 | // TODO: expose a setting for buffer value (ie default/single/double/triple) | 112 | // TODO: expose a setting for buffer value (ie default/single/double/triple) |
| 110 | format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); | 113 | format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); |
| 111 | format.setSwapInterval(0); | 114 | format.setSwapInterval(0); |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 0b291c7d0..270cccc77 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -65,6 +65,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual | |||
| 65 | #include "common/logging/backend.h" | 65 | #include "common/logging/backend.h" |
| 66 | #include "common/logging/filter.h" | 66 | #include "common/logging/filter.h" |
| 67 | #include "common/logging/log.h" | 67 | #include "common/logging/log.h" |
| 68 | #include "common/memory_detect.h" | ||
| 68 | #include "common/microprofile.h" | 69 | #include "common/microprofile.h" |
| 69 | #include "common/scm_rev.h" | 70 | #include "common/scm_rev.h" |
| 70 | #include "common/scope_exit.h" | 71 | #include "common/scope_exit.h" |
| @@ -219,6 +220,10 @@ GMainWindow::GMainWindow() | |||
| 219 | LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); | 220 | LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); |
| 220 | #endif | 221 | #endif |
| 221 | LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); | 222 | LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); |
| 223 | LOG_INFO(Frontend, "Host RAM: {:.2f} GB", | ||
| 224 | Common::GetMemInfo().TotalPhysicalMemory / 1024.0f / 1024 / 1024); | ||
| 225 | LOG_INFO(Frontend, "Host Swap: {:.2f} GB", | ||
| 226 | Common::GetMemInfo().TotalSwapMemory / 1024.0f / 1024 / 1024); | ||
| 222 | UpdateWindowTitle(); | 227 | UpdateWindowTitle(); |
| 223 | 228 | ||
| 224 | show(); | 229 | show(); |
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp index 411e7e647..09cc0a3b5 100644 --- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp +++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp | |||
| @@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen) | |||
| 98 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); | 98 | SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); |
| 99 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); | 99 | SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); |
| 100 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); | 100 | SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); |
| 101 | if (Settings::values.renderer_debug) { | ||
| 102 | SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG); | ||
| 103 | } | ||
| 101 | SDL_GL_SetSwapInterval(0); | 104 | SDL_GL_SetSwapInterval(0); |
| 102 | 105 | ||
| 103 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, | 106 | std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, |