summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/CMakeLists.txt6
-rw-r--r--src/common/memory_detect.cpp60
-rw-r--r--src/common/memory_detect.h22
-rw-r--r--src/common/x64/xbyak_abi.h266
-rw-r--r--src/common/x64/xbyak_util.h47
-rw-r--r--src/core/file_sys/patch_manager.cpp34
-rw-r--r--src/core/file_sys/patch_manager.h5
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h26
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp37
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp29
-rw-r--r--src/video_core/shader/decode/other.cpp14
-rw-r--r--src/video_core/shader/node.h5
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/texture_cache.h57
-rw-r--r--src/yuzu/bootmanager.cpp3
-rw-r--r--src/yuzu/main.cpp5
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp3
32 files changed, 648 insertions, 157 deletions
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e6769a5f3..24b7a083c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -123,6 +123,8 @@ add_library(common STATIC
123 lz4_compression.cpp 123 lz4_compression.cpp
124 lz4_compression.h 124 lz4_compression.h
125 math_util.h 125 math_util.h
126 memory_detect.cpp
127 memory_detect.h
126 memory_hook.cpp 128 memory_hook.cpp
127 memory_hook.h 129 memory_hook.h
128 microprofile.cpp 130 microprofile.cpp
@@ -169,10 +171,12 @@ if(ARCHITECTURE_x86_64)
169 PRIVATE 171 PRIVATE
170 x64/cpu_detect.cpp 172 x64/cpu_detect.cpp
171 x64/cpu_detect.h 173 x64/cpu_detect.h
174 x64/xbyak_abi.h
175 x64/xbyak_util.h
172 ) 176 )
173endif() 177endif()
174 178
175create_target_directory_groups(common) 179create_target_directory_groups(common)
176 180
177target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) 181target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile)
178target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) 182target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp
new file mode 100644
index 000000000..3fdc309a2
--- /dev/null
+++ b/src/common/memory_detect.cpp
@@ -0,0 +1,60 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef _WIN32
6// clang-format off
7#include <windows.h>
8#include <sysinfoapi.h>
9// clang-format on
10#else
11#include <sys/types.h>
12#ifdef __APPLE__
13#include <sys/sysctl.h>
14#else
15#include <sys/sysinfo.h>
16#endif
17#endif
18
19#include "common/memory_detect.h"
20
21namespace Common {
22
23// Detects the RAM and Swapfile sizes
24static MemoryInfo Detect() {
25 MemoryInfo mem_info{};
26
27#ifdef _WIN32
28 MEMORYSTATUSEX memorystatus;
29 memorystatus.dwLength = sizeof(memorystatus);
30 GlobalMemoryStatusEx(&memorystatus);
31 mem_info.TotalPhysicalMemory = memorystatus.ullTotalPhys;
32 mem_info.TotalSwapMemory = memorystatus.ullTotalPageFile - mem_info.TotalPhysicalMemory;
33#elif defined(__APPLE__)
34 u64 ramsize;
35 struct xsw_usage vmusage;
36 std::size_t sizeof_ramsize = sizeof(ramsize);
37 std::size_t sizeof_vmusage = sizeof(vmusage);
38 // hw and vm are defined in sysctl.h
39 // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471
40 // sysctlbyname(const char *, void *, size_t *, void *, size_t);
41 sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0);
42 sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0);
43 mem_info.TotalPhysicalMemory = ramsize;
44 mem_info.TotalSwapMemory = vmusage.xsu_total;
45#else
46 struct sysinfo meminfo;
47 sysinfo(&meminfo);
48 mem_info.TotalPhysicalMemory = meminfo.totalram;
49 mem_info.TotalSwapMemory = meminfo.totalswap;
50#endif
51
52 return mem_info;
53}
54
55const MemoryInfo& GetMemInfo() {
56 static MemoryInfo mem_info = Detect();
57 return mem_info;
58}
59
60} // namespace Common \ No newline at end of file
diff --git a/src/common/memory_detect.h b/src/common/memory_detect.h
new file mode 100644
index 000000000..a73c0f3f4
--- /dev/null
+++ b/src/common/memory_detect.h
@@ -0,0 +1,22 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Common {
10
11struct MemoryInfo {
12 u64 TotalPhysicalMemory{};
13 u64 TotalSwapMemory{};
14};
15
16/**
17 * Gets the memory info of the host system
18 * @return Reference to a MemoryInfo struct with the physical and swap memory sizes in bytes
19 */
20const MemoryInfo& GetMemInfo();
21
22} // namespace Common \ No newline at end of file
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..794da8a52
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,266 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <initializer_list>
9#include <xbyak.h>
10#include "common/assert.h"
11
12namespace Common::X64 {
13
14inline int RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers.");
18 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20}
21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) {
23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index);
25}
26
27inline Xbyak::Xmm IndexToXmm(int reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16);
30}
31
32inline Xbyak::Reg IndexToReg(int reg_index) {
33 if (reg_index < 16) {
34 return IndexToReg64(reg_index);
35 } else {
36 return IndexToXmm(reg_index);
37 }
38}
39
40inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
41 std::bitset<32> bits;
42 for (const Xbyak::Reg& reg : regs) {
43 bits[RegToIndex(reg)] = true;
44 }
45 return bits;
46}
47
48const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
49const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
50
51#ifdef _WIN32
52
53// Microsoft x64 ABI
54const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
55const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
56const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
57const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
58const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
59
60const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
61 // GPRs
62 Xbyak::util::rcx,
63 Xbyak::util::rdx,
64 Xbyak::util::r8,
65 Xbyak::util::r9,
66 Xbyak::util::r10,
67 Xbyak::util::r11,
68 // XMMs
69 Xbyak::util::xmm0,
70 Xbyak::util::xmm1,
71 Xbyak::util::xmm2,
72 Xbyak::util::xmm3,
73 Xbyak::util::xmm4,
74 Xbyak::util::xmm5,
75});
76
77const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
78 // GPRs
79 Xbyak::util::rbx,
80 Xbyak::util::rsi,
81 Xbyak::util::rdi,
82 Xbyak::util::rbp,
83 Xbyak::util::r12,
84 Xbyak::util::r13,
85 Xbyak::util::r14,
86 Xbyak::util::r15,
87 // XMMs
88 Xbyak::util::xmm6,
89 Xbyak::util::xmm7,
90 Xbyak::util::xmm8,
91 Xbyak::util::xmm9,
92 Xbyak::util::xmm10,
93 Xbyak::util::xmm11,
94 Xbyak::util::xmm12,
95 Xbyak::util::xmm13,
96 Xbyak::util::xmm14,
97 Xbyak::util::xmm15,
98});
99
100constexpr size_t ABI_SHADOW_SPACE = 0x20;
101
102#else
103
104// System V x86-64 ABI
105const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
106const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
107const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
108const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
109const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
110
111const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
112 // GPRs
113 Xbyak::util::rcx,
114 Xbyak::util::rdx,
115 Xbyak::util::rdi,
116 Xbyak::util::rsi,
117 Xbyak::util::r8,
118 Xbyak::util::r9,
119 Xbyak::util::r10,
120 Xbyak::util::r11,
121 // XMMs
122 Xbyak::util::xmm0,
123 Xbyak::util::xmm1,
124 Xbyak::util::xmm2,
125 Xbyak::util::xmm3,
126 Xbyak::util::xmm4,
127 Xbyak::util::xmm5,
128 Xbyak::util::xmm6,
129 Xbyak::util::xmm7,
130 Xbyak::util::xmm8,
131 Xbyak::util::xmm9,
132 Xbyak::util::xmm10,
133 Xbyak::util::xmm11,
134 Xbyak::util::xmm12,
135 Xbyak::util::xmm13,
136 Xbyak::util::xmm14,
137 Xbyak::util::xmm15,
138});
139
140const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
141 // GPRs
142 Xbyak::util::rbx,
143 Xbyak::util::rbp,
144 Xbyak::util::r12,
145 Xbyak::util::r13,
146 Xbyak::util::r14,
147 Xbyak::util::r15,
148});
149
150constexpr size_t ABI_SHADOW_SPACE = 0;
151
152#endif
153
154inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
155 size_t needed_frame_size, s32* out_subtraction,
156 s32* out_xmm_offset) {
157 const auto count = (regs & ABI_ALL_GPRS).count();
158 rsp_alignment -= count * 8;
159 size_t subtraction = 0;
160 const auto xmm_count = (regs & ABI_ALL_XMMS).count();
161 if (xmm_count) {
162 // If we have any XMMs to save, we must align the stack here.
163 subtraction = rsp_alignment & 0xF;
164 }
165 subtraction += 0x10 * xmm_count;
166 size_t xmm_base_subtraction = subtraction;
167 subtraction += needed_frame_size;
168 subtraction += ABI_SHADOW_SPACE;
169 // Final alignment.
170 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF;
172
173 *out_subtraction = (s32)subtraction;
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
175}
176
177inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
178 size_t rsp_alignment, size_t needed_frame_size = 0) {
179 s32 subtraction, xmm_offset;
180 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
181 for (std::size_t i = 0; i < regs.size(); ++i) {
182 if (regs[i] && ABI_ALL_GPRS[i]) {
183 code.push(IndexToReg64(static_cast<int>(i)));
184 }
185 }
186 if (subtraction != 0) {
187 code.sub(code.rsp, subtraction);
188 }
189
190 for (int i = 0; i < regs.count(); i++) {
191 if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
192 code.push(IndexToReg64(i));
193 }
194 }
195
196 for (std::size_t i = 0; i < regs.size(); ++i) {
197 if (regs[i] && ABI_ALL_XMMS[i]) {
198 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
199 xmm_offset += 0x10;
200 }
201 }
202
203 return ABI_SHADOW_SPACE;
204}
205
206inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
207 size_t rsp_alignment, size_t needed_frame_size = 0) {
208 s32 subtraction, xmm_offset;
209 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
210
211 for (std::size_t i = 0; i < regs.size(); ++i) {
212 if (regs[i] && ABI_ALL_XMMS[i]) {
213 code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
214 xmm_offset += 0x10;
215 }
216 }
217
218 if (subtraction != 0) {
219 code.add(code.rsp, subtraction);
220 }
221
222 // GPRs need to be popped in reverse order
223 for (int i = 15; i >= 0; i--) {
224 if (regs[i]) {
225 code.pop(IndexToReg64(i));
226 }
227 }
228}
229
230inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
231 size_t rsp_alignment,
232 size_t needed_frame_size = 0) {
233 s32 subtraction, xmm_offset;
234 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
235
236 for (std::size_t i = 0; i < regs.size(); ++i) {
237 if (regs[i] && ABI_ALL_GPRS[i]) {
238 code.push(IndexToReg64(static_cast<int>(i)));
239 }
240 }
241
242 if (subtraction != 0) {
243 code.sub(code.rsp, subtraction);
244 }
245
246 return ABI_SHADOW_SPACE;
247}
248
249inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
250 size_t rsp_alignment, size_t needed_frame_size = 0) {
251 s32 subtraction, xmm_offset;
252 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
253
254 if (subtraction != 0) {
255 code.add(code.rsp, subtraction);
256 }
257
258 // GPRs need to be popped in reverse order
259 for (int i = 15; i >= 0; i--) {
260 if (regs[i]) {
261 code.pop(IndexToReg64(i));
262 }
263 }
264}
265
266} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..df17f8cbe
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,47 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <xbyak.h>
9#include "common/x64/xbyak_abi.h"
10
11namespace Common::X64 {
12
13// Constants for use with cmpps/cmpss
14enum {
15 CMP_EQ = 0,
16 CMP_LT = 1,
17 CMP_LE = 2,
18 CMP_UNORD = 3,
19 CMP_NEQ = 4,
20 CMP_NLT = 5,
21 CMP_NLE = 6,
22 CMP_ORD = 7,
23};
24
25constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
26 const u64 distance = target - (ref + 5);
27 return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
28}
29
30inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
31 return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
32}
33
34template <typename T>
35inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
36 static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
37 size_t addr = reinterpret_cast<size_t>(f);
38 if (IsWithin2G(code, addr)) {
39 code.call(f);
40 } else {
41 // ABI_RETURN is a safe temp register to use before a call
42 code.mov(ABI_RETURN, addr);
43 code.call(ABI_RETURN);
44 }
45}
46
47} // namespace Common::X64
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index b93aa6935..c47ff863e 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -10,6 +10,7 @@
10#include "common/file_util.h" 10#include "common/file_util.h"
11#include "common/hex_util.h" 11#include "common/hex_util.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/string_util.h"
13#include "core/core.h" 14#include "core/core.h"
14#include "core/file_sys/content_archive.h" 15#include "core/file_sys/content_archive.h"
15#include "core/file_sys/control_metadata.h" 16#include "core/file_sys/control_metadata.h"
@@ -48,6 +49,23 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
48 return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]); 49 return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]);
49} 50}
50 51
52std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
53 std::string_view name) {
54#ifdef _WIN32
55 return dir->GetSubdirectory(name);
56#else
57 const auto subdirs = dir->GetSubdirectories();
58 for (const auto& subdir : subdirs) {
59 std::string dir_name = Common::ToLower(subdir->GetName());
60 if (dir_name == name) {
61 return subdir;
62 }
63 }
64
65 return nullptr;
66#endif
67}
68
51PatchManager::PatchManager(u64 title_id) : title_id(title_id) {} 69PatchManager::PatchManager(u64 title_id) : title_id(title_id) {}
52 70
53PatchManager::~PatchManager() = default; 71PatchManager::~PatchManager() = default;
@@ -104,7 +122,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
104 if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end()) 122 if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end())
105 continue; 123 continue;
106 124
107 auto exefs_dir = subdir->GetSubdirectory("exefs"); 125 auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
108 if (exefs_dir != nullptr) 126 if (exefs_dir != nullptr)
109 layers.push_back(std::move(exefs_dir)); 127 layers.push_back(std::move(exefs_dir));
110 } 128 }
@@ -130,7 +148,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
130 if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) 148 if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend())
131 continue; 149 continue;
132 150
133 auto exefs_dir = subdir->GetSubdirectory("exefs"); 151 auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
134 if (exefs_dir != nullptr) { 152 if (exefs_dir != nullptr) {
135 for (const auto& file : exefs_dir->GetFiles()) { 153 for (const auto& file : exefs_dir->GetFiles()) {
136 if (file->GetExtension() == "ips") { 154 if (file->GetExtension() == "ips") {
@@ -295,7 +313,7 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
295 continue; 313 continue;
296 } 314 }
297 315
298 auto cheats_dir = subdir->GetSubdirectory("cheats"); 316 auto cheats_dir = FindSubdirectoryCaseless(subdir, "cheats");
299 if (cheats_dir != nullptr) { 317 if (cheats_dir != nullptr) {
300 auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true); 318 auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
301 if (res.has_value()) { 319 if (res.has_value()) {
@@ -340,11 +358,11 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
340 continue; 358 continue;
341 } 359 }
342 360
343 auto romfs_dir = subdir->GetSubdirectory("romfs"); 361 auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs");
344 if (romfs_dir != nullptr) 362 if (romfs_dir != nullptr)
345 layers.push_back(std::move(romfs_dir)); 363 layers.push_back(std::move(romfs_dir));
346 364
347 auto ext_dir = subdir->GetSubdirectory("romfs_ext"); 365 auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext");
348 if (ext_dir != nullptr) 366 if (ext_dir != nullptr)
349 layers_ext.push_back(std::move(ext_dir)); 367 layers_ext.push_back(std::move(ext_dir));
350 } 368 }
@@ -470,7 +488,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
470 for (const auto& mod : mod_dir->GetSubdirectories()) { 488 for (const auto& mod : mod_dir->GetSubdirectories()) {
471 std::string types; 489 std::string types;
472 490
473 const auto exefs_dir = mod->GetSubdirectory("exefs"); 491 const auto exefs_dir = FindSubdirectoryCaseless(mod, "exefs");
474 if (IsDirValidAndNonEmpty(exefs_dir)) { 492 if (IsDirValidAndNonEmpty(exefs_dir)) {
475 bool ips = false; 493 bool ips = false;
476 bool ipswitch = false; 494 bool ipswitch = false;
@@ -494,9 +512,9 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
494 if (layeredfs) 512 if (layeredfs)
495 AppendCommaIfNotEmpty(types, "LayeredExeFS"); 513 AppendCommaIfNotEmpty(types, "LayeredExeFS");
496 } 514 }
497 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs"))) 515 if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "romfs")))
498 AppendCommaIfNotEmpty(types, "LayeredFS"); 516 AppendCommaIfNotEmpty(types, "LayeredFS");
499 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats"))) 517 if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "cheats")))
500 AppendCommaIfNotEmpty(types, "Cheats"); 518 AppendCommaIfNotEmpty(types, "Cheats");
501 519
502 if (types.empty()) 520 if (types.empty())
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index ec6db524d..f4cb918dd 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -29,6 +29,11 @@ enum class TitleVersionFormat : u8 {
29std::string FormatTitleVersion(u32 version, 29std::string FormatTitleVersion(u32 version,
30 TitleVersionFormat format = TitleVersionFormat::ThreeElements); 30 TitleVersionFormat format = TitleVersionFormat::ThreeElements);
31 31
32// Returns a directory with name matching name case-insensitive. Returns nullptr if directory
33// doesn't have a directory with name.
34std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
35 std::string_view name);
36
32// A centralized class to manage patches to games. 37// A centralized class to manage patches to games.
33class PatchManager { 38class PatchManager {
34public: 39public:
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f00c71dae..d6ee82836 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -229,7 +229,7 @@ endif()
229create_target_directory_groups(video_core) 229create_target_directory_groups(video_core)
230 230
231target_link_libraries(video_core PUBLIC common core) 231target_link_libraries(video_core PUBLIC common core)
232target_link_libraries(video_core PRIVATE glad) 232target_link_libraries(video_core PRIVATE glad xbyak)
233 233
234if (ENABLE_VULKAN) 234if (ENABLE_VULKAN)
235 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 235 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b5a70b9fc..13ef2e42d 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -461,8 +461,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
461 461
462void Maxwell3D::ProcessQueryGet() { 462void Maxwell3D::ProcessQueryGet() {
463 // TODO(Subv): Support the other query units. 463 // TODO(Subv): Support the other query units.
464 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 464 if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
465 "Units other than CROP are unimplemented"); 465 LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
466 }
466 467
467 switch (regs.query.query_get.operation) { 468 switch (regs.query.query_get.operation) {
468 case Regs::QueryOperation::Release: 469 case Regs::QueryOperation::Release:
@@ -538,8 +539,8 @@ void Maxwell3D::ProcessCounterReset() {
538 rasterizer.ResetCounter(QueryType::SamplesPassed); 539 rasterizer.ResetCounter(QueryType::SamplesPassed);
539 break; 540 break;
540 default: 541 default:
541 LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", 542 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
542 static_cast<int>(regs.counter_reset)); 543 static_cast<int>(regs.counter_reset));
543 break; 544 break;
544 } 545 }
545} 546}
@@ -596,8 +597,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
596 system.GPU().GetTicks()); 597 system.GPU().GetTicks());
597 return {}; 598 return {};
598 default: 599 default:
599 UNIMPLEMENTED_MSG("Unimplemented query select type {}", 600 LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
600 static_cast<u32>(regs.query.query_get.select.Value())); 601 static_cast<u32>(regs.query.query_get.select.Value()));
601 return 1; 602 return 1;
602 } 603 }
603} 604}
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 466a911db..e1b245288 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -166,8 +166,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
166 166
167 const bool is_nvidia = vendor == "NVIDIA Corporation"; 167 const bool is_nvidia = vendor == "NVIDIA Corporation";
168 const bool is_amd = vendor == "ATI Technologies Inc."; 168 const bool is_amd = vendor == "ATI Technologies Inc.";
169 const bool is_intel = vendor == "Intel";
170 const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
171 169
172 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 170 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
173 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 171 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -182,7 +180,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
182 has_variable_aoffi = TestVariableAoffi(); 180 has_variable_aoffi = TestVariableAoffi();
183 has_component_indexing_bug = is_amd; 181 has_component_indexing_bug = is_amd;
184 has_precise_bug = TestPreciseBug(); 182 has_precise_bug = TestPreciseBug();
185 has_broken_compute = is_intel_proprietary;
186 has_fast_buffer_sub_data = is_nvidia; 183 has_fast_buffer_sub_data = is_nvidia;
187 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 184 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
188 GLAD_GL_NV_compute_program5; 185 GLAD_GL_NV_compute_program5;
@@ -206,7 +203,6 @@ Device::Device(std::nullptr_t) {
206 has_image_load_formatted = true; 203 has_image_load_formatted = true;
207 has_variable_aoffi = true; 204 has_variable_aoffi = true;
208 has_component_indexing_bug = false; 205 has_component_indexing_bug = false;
209 has_broken_compute = false;
210 has_precise_bug = false; 206 has_precise_bug = false;
211} 207}
212 208
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e915dbd86..683ed9002 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -80,10 +80,6 @@ public:
80 return has_precise_bug; 80 return has_precise_bug;
81 } 81 }
82 82
83 bool HasBrokenCompute() const {
84 return has_broken_compute;
85 }
86
87 bool HasFastBufferSubData() const { 83 bool HasFastBufferSubData() const {
88 return has_fast_buffer_sub_data; 84 return has_fast_buffer_sub_data;
89 } 85 }
@@ -109,7 +105,6 @@ private:
109 bool has_variable_aoffi{}; 105 bool has_variable_aoffi{};
110 bool has_component_indexing_bug{}; 106 bool has_component_indexing_bug{};
111 bool has_precise_bug{}; 107 bool has_precise_bug{};
112 bool has_broken_compute{};
113 bool has_fast_buffer_sub_data{}; 108 bool has_fast_buffer_sub_data{};
114 bool use_assembly_shaders{}; 109 bool use_assembly_shaders{};
115}; 110};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8116a5daa..3c421dd16 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -655,10 +655,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
655} 655}
656 656
657void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 657void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
658 if (device.HasBrokenCompute()) {
659 return;
660 }
661
662 buffer_cache.Acquire(); 658 buffer_cache.Acquire();
663 current_cbuf = 0; 659 current_cbuf = 0;
664 660
@@ -977,16 +973,12 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
977 glBindTextureUnit(binding, 0); 973 glBindTextureUnit(binding, 0);
978 return; 974 return;
979 } 975 }
980 glBindTextureUnit(binding, view->GetTexture()); 976 const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
981 977 texture.tic.z_source, texture.tic.w_source);
982 if (view->GetSurfaceParams().IsBuffer()) { 978 glBindTextureUnit(binding, handle);
983 return; 979 if (!view->GetSurfaceParams().IsBuffer()) {
980 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
984 } 981 }
985 // Apply swizzle to textures that are not buffers.
986 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
987 texture.tic.w_source);
988
989 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
990} 982}
991 983
992void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 984void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
@@ -1015,14 +1007,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
1015 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); 1007 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
1016 return; 1008 return;
1017 } 1009 }
1018 if (!tic.IsBuffer()) {
1019 view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1020 }
1021 if (entry.is_written) { 1010 if (entry.is_written) {
1022 view->MarkAsModified(texture_cache.Tick()); 1011 view->MarkAsModified(texture_cache.Tick());
1023 } 1012 }
1024 glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, 1013 const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
1025 view->GetFormat()); 1014 glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
1026} 1015}
1027 1016
1028void RasterizerOpenGL::SyncViewport() { 1017void RasterizerOpenGL::SyncViewport() {
@@ -1031,6 +1020,26 @@ void RasterizerOpenGL::SyncViewport() {
1031 const auto& regs = gpu.regs; 1020 const auto& regs = gpu.regs;
1032 1021
1033 const bool dirty_viewport = flags[Dirty::Viewports]; 1022 const bool dirty_viewport = flags[Dirty::Viewports];
1023 const bool dirty_clip_control = flags[Dirty::ClipControl];
1024
1025 if (dirty_clip_control || flags[Dirty::FrontFace]) {
1026 flags[Dirty::FrontFace] = false;
1027
1028 GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
1029 if (regs.screen_y_control.triangle_rast_flip != 0 &&
1030 regs.viewport_transform[0].scale_y < 0.0f) {
1031 switch (mode) {
1032 case GL_CW:
1033 mode = GL_CCW;
1034 break;
1035 case GL_CCW:
1036 mode = GL_CW;
1037 break;
1038 }
1039 }
1040 glFrontFace(mode);
1041 }
1042
1034 if (dirty_viewport || flags[Dirty::ClipControl]) { 1043 if (dirty_viewport || flags[Dirty::ClipControl]) {
1035 flags[Dirty::ClipControl] = false; 1044 flags[Dirty::ClipControl] = false;
1036 1045
@@ -1128,11 +1137,6 @@ void RasterizerOpenGL::SyncCullMode() {
1128 glDisable(GL_CULL_FACE); 1137 glDisable(GL_CULL_FACE);
1129 } 1138 }
1130 } 1139 }
1131
1132 if (flags[Dirty::FrontFace]) {
1133 flags[Dirty::FrontFace] = false;
1134 glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
1135 }
1136} 1140}
1137 1141
1138void RasterizerOpenGL::SyncPrimitiveRestart() { 1142void RasterizerOpenGL::SyncPrimitiveRestart() {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 253484968..9cb115959 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2344,7 +2344,12 @@ private:
2344 return {}; 2344 return {};
2345 } 2345 }
2346 2346
2347 Expression MemoryBarrierGL(Operation) { 2347 Expression MemoryBarrierGroup(Operation) {
2348 code.AddLine("groupMemoryBarrier();");
2349 return {};
2350 }
2351
2352 Expression MemoryBarrierGlobal(Operation) {
2348 code.AddLine("memoryBarrier();"); 2353 code.AddLine("memoryBarrier();");
2349 return {}; 2354 return {};
2350 } 2355 }
@@ -2591,7 +2596,8 @@ private:
2591 &GLSLDecompiler::ShuffleIndexed, 2596 &GLSLDecompiler::ShuffleIndexed,
2592 2597
2593 &GLSLDecompiler::Barrier, 2598 &GLSLDecompiler::Barrier,
2594 &GLSLDecompiler::MemoryBarrierGL, 2599 &GLSLDecompiler::MemoryBarrierGroup,
2600 &GLSLDecompiler::MemoryBarrierGlobal,
2595 }; 2601 };
2596 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2602 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2597 2603
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 96605db84..8e754fa90 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -47,6 +47,10 @@ void ProgramManager::BindHostPipeline(GLuint pipeline) {
47 old_state.geometry = 0; 47 old_state.geometry = 0;
48 glDisable(GL_GEOMETRY_PROGRAM_NV); 48 glDisable(GL_GEOMETRY_PROGRAM_NV);
49 } 49 }
50 } else {
51 if (!is_graphics_bound) {
52 glUseProgram(0);
53 }
50 } 54 }
51 glBindProgramPipeline(pipeline); 55 glBindProgramPipeline(pipeline);
52} 56}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 94fbd2a22..4faa8b90c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
35namespace { 35namespace {
36 36
37struct FormatTuple { 37struct FormatTuple {
38 GLint internal_format; 38 GLenum internal_format;
39 GLenum format = GL_NONE; 39 GLenum format = GL_NONE;
40 GLenum type = GL_NONE; 40 GLenum type = GL_NONE;
41}; 41};
@@ -238,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
238 return texture; 238 return texture;
239} 239}
240 240
241constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
242 SwizzleSource w_source) {
243 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
244 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
245}
246
241} // Anonymous namespace 247} // Anonymous namespace
242 248
243CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, 249CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
@@ -381,7 +387,7 @@ void CachedSurface::DecorateSurfaceName() {
381} 387}
382 388
383void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { 389void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
384 LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); 390 LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
385} 391}
386 392
387View CachedSurface::CreateView(const ViewParams& view_key) { 393View CachedSurface::CreateView(const ViewParams& view_key) {
@@ -397,14 +403,13 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
397} 403}
398 404
399CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, 405CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
400 const bool is_proxy) 406 bool is_proxy)
401 : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { 407 : VideoCommon::ViewBase(params), surface{surface},
402 target = GetTextureTarget(params.target); 408 format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
403 format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; 409 target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
404 if (!is_proxy) { 410 if (!is_proxy) {
405 texture_view = CreateTextureView(); 411 main_view = CreateTextureView();
406 } 412 }
407 swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
408} 413}
409 414
410CachedSurfaceView::~CachedSurfaceView() = default; 415CachedSurfaceView::~CachedSurfaceView() = default;
@@ -447,27 +452,49 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
447 } 452 }
448} 453}
449 454
450void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, 455GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
451 SwizzleSource z_source, SwizzleSource w_source) { 456 SwizzleSource z_source, SwizzleSource w_source) {
452 u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 457 if (GetSurfaceParams().IsBuffer()) {
453 if (new_swizzle == swizzle) 458 return GetTexture();
454 return; 459 }
455 swizzle = new_swizzle; 460 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
456 const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), 461 if (current_swizzle == new_swizzle) {
457 GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; 462 return current_view;
458 const GLuint handle = GetTexture(); 463 }
459 const PixelFormat format = surface.GetSurfaceParams().pixel_format; 464 current_swizzle = new_swizzle;
460 switch (format) { 465
466 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
467 OGLTextureView& view = entry->second;
468 if (!is_cache_miss) {
469 current_view = view.handle;
470 return view.handle;
471 }
472 view = CreateTextureView();
473 current_view = view.handle;
474
475 std::array swizzle{x_source, y_source, z_source, w_source};
476
477 switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
461 case PixelFormat::Z24S8: 478 case PixelFormat::Z24S8:
462 case PixelFormat::Z32FS8: 479 case PixelFormat::Z32FS8:
463 case PixelFormat::S8Z24: 480 case PixelFormat::S8Z24:
464 glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, 481 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
482 glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
465 GetComponent(format, x_source == SwizzleSource::R)); 483 GetComponent(format, x_source == SwizzleSource::R));
484
485 // Make sure we sample the first component
486 std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
487 return value == SwizzleSource::G ? SwizzleSource::R : value;
488 });
489 [[fallthrough]];
490 default: {
491 const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
492 GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
493 glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
466 break; 494 break;
467 default:
468 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
469 break;
470 } 495 }
496 }
497 return view.handle;
471} 498}
472 499
473OGLTextureView CachedSurfaceView::CreateTextureView() const { 500OGLTextureView CachedSurfaceView::CreateTextureView() const {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 02d9981a1..8a2ac8603 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -83,7 +83,7 @@ public:
83 /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER 83 /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
84 void Attach(GLenum attachment, GLenum target) const; 84 void Attach(GLenum attachment, GLenum target) const;
85 85
86 void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, 86 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
87 Tegra::Texture::SwizzleSource y_source, 87 Tegra::Texture::SwizzleSource y_source,
88 Tegra::Texture::SwizzleSource z_source, 88 Tegra::Texture::SwizzleSource z_source,
89 Tegra::Texture::SwizzleSource w_source); 89 Tegra::Texture::SwizzleSource w_source);
@@ -98,7 +98,7 @@ public:
98 if (is_proxy) { 98 if (is_proxy) {
99 return surface.GetTexture(); 99 return surface.GetTexture();
100 } 100 }
101 return texture_view.handle; 101 return main_view.handle;
102 } 102 }
103 103
104 GLenum GetFormat() const { 104 GLenum GetFormat() const {
@@ -110,23 +110,19 @@ public:
110 } 110 }
111 111
112private: 112private:
113 u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
114 Tegra::Texture::SwizzleSource y_source,
115 Tegra::Texture::SwizzleSource z_source,
116 Tegra::Texture::SwizzleSource w_source) const {
117 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
118 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
119 }
120
121 OGLTextureView CreateTextureView() const; 113 OGLTextureView CreateTextureView() const;
122 114
123 CachedSurface& surface; 115 CachedSurface& surface;
124 GLenum target{}; 116 const GLenum format;
125 GLenum format{}; 117 const GLenum target;
118 const bool is_proxy;
119
120 std::unordered_map<u32, OGLTextureView> view_cache;
121 OGLTextureView main_view;
126 122
127 OGLTextureView texture_view; 123 // Use an invalid default so it always fails the comparison test
128 u32 swizzle{}; 124 u32 current_swizzle = 0xffffffff;
129 bool is_proxy{}; 125 GLuint current_view = 0;
130}; 126};
131 127
132class TextureCacheOpenGL final : public TextureCacheBase { 128class TextureCacheOpenGL final : public TextureCacheBase {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6b489e6db..e7952924a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
753bool RendererOpenGL::Init() { 753bool RendererOpenGL::Init() {
754 if (GLAD_GL_KHR_debug) { 754 if (GLAD_GL_KHR_debug) {
755 glEnable(GL_DEBUG_OUTPUT); 755 glEnable(GL_DEBUG_OUTPUT);
756 if (Settings::values.renderer_debug) {
757 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
758 }
756 glDebugMessageCallback(DebugHandler, nullptr); 759 glDebugMessageCallback(DebugHandler, nullptr);
757 } 760 }
758 761
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 568744e3c..424278816 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
72 72
73 u32 packed_front_face = PackFrontFace(regs.front_face); 73 u32 packed_front_face = PackFrontFace(regs.front_face);
74 if (regs.screen_y_control.triangle_rast_flip != 0 && 74 if (regs.screen_y_control.triangle_rast_flip != 0) {
75 regs.viewport_transform[0].scale_y > 0.0f) {
76 // Flip front face 75 // Flip front face
77 packed_front_face = 1 - packed_front_face; 76 packed_front_face = 1 - packed_front_face;
78 } 77 }
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 12be691a5..2871035f5 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -142,7 +142,7 @@ struct FormatTuple {
142 {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 142 {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
143 {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 143 {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
144 {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 144 {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
145 {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 145 {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8
146 {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F 146 {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
147 {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F 147 {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
148 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F 148 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
@@ -168,7 +168,7 @@ struct FormatTuple {
168 {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 168 {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
169 {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 169 {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
170 {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 170 {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
171 {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB 171 {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB
172 {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB 172 {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
173 {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB 173 {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
174 {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB 174 {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index f0c491d00..750e5a0ca 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -104,6 +104,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
104 VK_FORMAT_R16_SFLOAT, 104 VK_FORMAT_R16_SFLOAT,
105 VK_FORMAT_R16G16B16A16_SFLOAT, 105 VK_FORMAT_R16G16B16A16_SFLOAT,
106 VK_FORMAT_B8G8R8A8_UNORM, 106 VK_FORMAT_B8G8R8A8_UNORM,
107 VK_FORMAT_B8G8R8A8_SRGB,
107 VK_FORMAT_R4G4B4A4_UNORM_PACK16, 108 VK_FORMAT_R4G4B4A4_UNORM_PACK16,
108 VK_FORMAT_D32_SFLOAT, 109 VK_FORMAT_D32_SFLOAT,
109 VK_FORMAT_D16_UNORM, 110 VK_FORMAT_D16_UNORM,
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a5c7b7945..65a1c6245 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
312 ASSERT(point_size != 0.0f); 312 ASSERT(point_size != 0.0f);
313 } 313 }
314 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { 314 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
315 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); 315 const auto& attribute = fixed_state.vertex_input.attributes[i];
316 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
317 specialization.attribute_types[i] = attribute.Type();
316 } 318 }
317 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 319 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
318 320
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index be5b77fae..a3d992ed3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
877 877
878 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 878 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
879 const auto& attrib = regs.vertex_attrib_format[index]; 879 const auto& attrib = regs.vertex_attrib_format[index];
880 if (!attrib.IsValid()) { 880 if (attrib.IsConstant()) {
881 vertex_input.SetAttribute(index, false, 0, 0, {}, {}); 881 vertex_input.SetAttribute(index, false, 0, 0, {}, {});
882 continue; 882 continue;
883 } 883 }
884
885 [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
886 ASSERT(buffer.IsEnabled());
887
888 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), 884 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
889 attrib.size.Value()); 885 attrib.size.Value());
890 } 886 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 890f34a2c..a13e8baa7 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -741,8 +741,10 @@ private:
741 if (!IsGenericAttribute(index)) { 741 if (!IsGenericAttribute(index)) {
742 continue; 742 continue;
743 } 743 }
744
745 const u32 location = GetGenericAttributeLocation(index); 744 const u32 location = GetGenericAttributeLocation(index);
745 if (!IsAttributeEnabled(location)) {
746 continue;
747 }
746 const auto type_descriptor = GetAttributeType(location); 748 const auto type_descriptor = GetAttributeType(location);
747 Id type; 749 Id type;
748 if (IsInputAttributeArray()) { 750 if (IsInputAttributeArray()) {
@@ -986,6 +988,10 @@ private:
986 return stage == ShaderType::TesselationControl; 988 return stage == ShaderType::TesselationControl;
987 } 989 }
988 990
991 bool IsAttributeEnabled(u32 location) const {
992 return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
993 }
994
989 u32 GetNumInputVertices() const { 995 u32 GetNumInputVertices() const {
990 switch (stage) { 996 switch (stage) {
991 case ShaderType::Geometry: 997 case ShaderType::Geometry:
@@ -1201,16 +1207,20 @@ private:
1201 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); 1207 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1202 return {v_float_zero, Type::Float}; 1208 return {v_float_zero, Type::Float};
1203 default: 1209 default:
1204 if (IsGenericAttribute(attribute)) { 1210 if (!IsGenericAttribute(attribute)) {
1205 const u32 location = GetGenericAttributeLocation(attribute); 1211 break;
1206 const auto type_descriptor = GetAttributeType(location);
1207 const Type type = type_descriptor.type;
1208 const Id attribute_id = input_attributes.at(attribute);
1209 const std::vector elements = {element};
1210 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1211 return {OpLoad(GetTypeDefinition(type), pointer), type};
1212 } 1212 }
1213 break; 1213 const u32 location = GetGenericAttributeLocation(attribute);
1214 if (!IsAttributeEnabled(location)) {
1215 // Disabled attributes (also known as constant attributes) always return zero.
1216 return {v_float_zero, Type::Float};
1217 }
1218 const auto type_descriptor = GetAttributeType(location);
1219 const Type type = type_descriptor.type;
1220 const Id attribute_id = input_attributes.at(attribute);
1221 const std::vector elements = {element};
1222 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1223 return {OpLoad(GetTypeDefinition(type), pointer), type};
1214 } 1224 }
1215 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 1225 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
1216 return {v_float_zero, Type::Float}; 1226 return {v_float_zero, Type::Float};
@@ -2215,8 +2225,8 @@ private:
2215 return {}; 2225 return {};
2216 } 2226 }
2217 2227
2218 Expression MemoryBarrierGL(Operation) { 2228 template <spv::Scope scope>
2219 const auto scope = spv::Scope::Device; 2229 Expression MemoryBarrier(Operation) {
2220 const auto semantics = 2230 const auto semantics =
2221 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | 2231 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
2222 spv::MemorySemanticsMask::WorkgroupMemory | 2232 spv::MemorySemanticsMask::WorkgroupMemory |
@@ -2681,7 +2691,8 @@ private:
2681 &SPIRVDecompiler::ShuffleIndexed, 2691 &SPIRVDecompiler::ShuffleIndexed,
2682 2692
2683 &SPIRVDecompiler::Barrier, 2693 &SPIRVDecompiler::Barrier,
2684 &SPIRVDecompiler::MemoryBarrierGL, 2694 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
2695 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
2685 }; 2696 };
2686 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2697 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2687 2698
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f4c05ac3c..b7af26388 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -88,7 +88,8 @@ struct Specialization final {
88 u32 shared_memory_size{}; 88 u32 shared_memory_size{};
89 89
90 // Graphics specific 90 // Graphics specific
91 std::optional<float> point_size{}; 91 std::optional<float> point_size;
92 std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
92 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 93 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
93 bool ndc_minus_one_to_one{}; 94 bool ndc_minus_one_to_one{};
94}; 95};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 55f43e61b..2f1d5021d 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -354,26 +354,23 @@ CachedSurfaceView::~CachedSurfaceView() = default;
354 354
355VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, 355VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
356 SwizzleSource z_source, SwizzleSource w_source) { 356 SwizzleSource z_source, SwizzleSource w_source) {
357 const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 357 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
358 if (last_image_view && last_swizzle == swizzle) { 358 if (last_image_view && last_swizzle == new_swizzle) {
359 return last_image_view; 359 return last_image_view;
360 } 360 }
361 last_swizzle = swizzle; 361 last_swizzle = new_swizzle;
362 362
363 const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle); 363 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
364 auto& image_view = entry->second; 364 auto& image_view = entry->second;
365 if (!is_cache_miss) { 365 if (!is_cache_miss) {
366 return last_image_view = *image_view; 366 return last_image_view = *image_view;
367 } 367 }
368 368
369 auto swizzle_x = MaxwellToVK::SwizzleSource(x_source); 369 std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
370 auto swizzle_y = MaxwellToVK::SwizzleSource(y_source); 370 MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
371 auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
372 auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
373
374 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { 371 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
375 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. 372 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
376 std::swap(swizzle_x, swizzle_z); 373 std::swap(swizzle[0], swizzle[2]);
377 } 374 }
378 375
379 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on 376 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
@@ -395,11 +392,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
395 UNIMPLEMENTED(); 392 UNIMPLEMENTED();
396 } 393 }
397 394
398 // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity 395 // Make sure we sample the first component
399 swizzle_x = VK_COMPONENT_SWIZZLE_R; 396 std::transform(
400 swizzle_y = VK_COMPONENT_SWIZZLE_G; 397 swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) {
401 swizzle_z = VK_COMPONENT_SWIZZLE_B; 398 return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component;
402 swizzle_w = VK_COMPONENT_SWIZZLE_A; 399 });
403 } 400 }
404 401
405 VkImageViewCreateInfo ci; 402 VkImageViewCreateInfo ci;
@@ -409,7 +406,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
409 ci.image = surface.GetImageHandle(); 406 ci.image = surface.GetImageHandle();
410 ci.viewType = image_view_type; 407 ci.viewType = image_view_type;
411 ci.format = surface.GetImage().GetFormat(); 408 ci.format = surface.GetImage().GetFormat();
412 ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; 409 ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
413 ci.subresourceRange.aspectMask = aspect; 410 ci.subresourceRange.aspectMask = aspect;
414 ci.subresourceRange.baseMipLevel = base_level; 411 ci.subresourceRange.baseMipLevel = base_level;
415 ci.subresourceRange.levelCount = num_levels; 412 ci.subresourceRange.levelCount = num_levels;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 694b325e1..d00e10913 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -299,9 +299,19 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
299 break; 299 break;
300 } 300 }
301 case OpCode::Id::MEMBAR: { 301 case OpCode::Id::MEMBAR: {
302 UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
303 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); 302 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
304 bb.push_back(Operation(OperationCode::MemoryBarrierGL)); 303 const OperationCode type = [instr] {
304 switch (instr.membar.type) {
305 case Tegra::Shader::MembarType::CTA:
306 return OperationCode::MemoryBarrierGroup;
307 case Tegra::Shader::MembarType::GL:
308 return OperationCode::MemoryBarrierGlobal;
309 default:
310 UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
311 return OperationCode::MemoryBarrierGlobal;
312 }
313 }();
314 bb.push_back(Operation(type));
305 break; 315 break;
306 } 316 }
307 case OpCode::Id::DEPBAR: { 317 case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c06512413..c5e5165ff 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -233,8 +233,9 @@ enum class OperationCode {
233 ThreadLtMask, /// () -> uint 233 ThreadLtMask, /// () -> uint
234 ShuffleIndexed, /// (uint value, uint index) -> uint 234 ShuffleIndexed, /// (uint value, uint index) -> uint
235 235
236 Barrier, /// () -> void 236 Barrier, /// () -> void
237 MemoryBarrierGL, /// () -> void 237 MemoryBarrierGroup, /// () -> void
238 MemoryBarrierGlobal, /// () -> void
238 239
239 Amount, 240 Amount,
240}; 241};
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7032e0059..f476f03b0 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 77> DefinitionTable = {{ 44constexpr std::array<Table, 78> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{
98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, 98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, 99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, 100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, 102 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
102 103
103 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, 104 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d6efc34b2..45e3ddd2c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
14#include <unordered_map> 14#include <unordered_map>
15#include <vector> 15#include <vector>
16 16
17#include <boost/container/small_vector.hpp>
17#include <boost/icl/interval_map.hpp> 18#include <boost/icl/interval_map.hpp>
18#include <boost/range/iterator_range.hpp> 19#include <boost/range/iterator_range.hpp>
19 20
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
53 54
54template <typename TSurface, typename TView> 55template <typename TSurface, typename TView>
55class TextureCache { 56class TextureCache {
57 using VectorSurface = boost::container::small_vector<TSurface, 1>;
56 58
57public: 59public:
58 void InvalidateRegion(VAddr addr, std::size_t size) { 60 void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -308,18 +310,20 @@ public:
308 dst_surface.first->MarkAsModified(true, Tick()); 310 dst_surface.first->MarkAsModified(true, Tick());
309 } 311 }
310 312
311 TSurface TryFindFramebufferSurface(VAddr addr) { 313 TSurface TryFindFramebufferSurface(VAddr addr) const {
312 if (!addr) { 314 if (!addr) {
313 return nullptr; 315 return nullptr;
314 } 316 }
315 const VAddr page = addr >> registry_page_bits; 317 const VAddr page = addr >> registry_page_bits;
316 std::vector<TSurface>& list = registry[page]; 318 const auto it = registry.find(page);
317 for (auto& surface : list) { 319 if (it == registry.end()) {
318 if (surface->GetCpuAddr() == addr) { 320 return nullptr;
319 return surface;
320 }
321 } 321 }
322 return nullptr; 322 const auto& list = it->second;
323 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
324 return surface->GetCpuAddr() == addr;
325 });
326 return found != list.end() ? *found : nullptr;
323 } 327 }
324 328
325 u64 Tick() { 329 u64 Tick() {
@@ -498,7 +502,7 @@ private:
498 * @param untopological Indicates to the recycler that the texture has no way 502 * @param untopological Indicates to the recycler that the texture has no way
499 * to match the overlaps due to topological reasons. 503 * to match the overlaps due to topological reasons.
500 **/ 504 **/
501 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 505 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
502 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 506 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
503 if (Settings::IsGPULevelExtreme()) { 507 if (Settings::IsGPULevelExtreme()) {
504 return RecycleStrategy::Flush; 508 return RecycleStrategy::Flush;
@@ -538,9 +542,8 @@ private:
538 * @param untopological Indicates to the recycler that the texture has no way to match the 542 * @param untopological Indicates to the recycler that the texture has no way to match the
539 * overlaps due to topological reasons. 543 * overlaps due to topological reasons.
540 **/ 544 **/
541 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 545 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
542 const SurfaceParams& params, const GPUVAddr gpu_addr, 546 const GPUVAddr gpu_addr, const bool preserve_contents,
543 const bool preserve_contents,
544 const MatchTopologyResult untopological) { 547 const MatchTopologyResult untopological) {
545 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); 548 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
546 for (auto& surface : overlaps) { 549 for (auto& surface : overlaps) {
@@ -650,7 +653,7 @@ private:
650 * @param params The parameters on the new surface. 653 * @param params The parameters on the new surface.
651 * @param gpu_addr The starting address of the new surface. 654 * @param gpu_addr The starting address of the new surface.
652 **/ 655 **/
653 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, 656 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
654 const SurfaceParams& params, 657 const SurfaceParams& params,
655 const GPUVAddr gpu_addr) { 658 const GPUVAddr gpu_addr) {
656 if (params.target == SurfaceTarget::Texture3D) { 659 if (params.target == SurfaceTarget::Texture3D) {
@@ -708,7 +711,7 @@ private:
708 * @param preserve_contents Indicates that the new surface should be loaded from memory or 711 * @param preserve_contents Indicates that the new surface should be loaded from memory or
709 * left blank. 712 * left blank.
710 */ 713 */
711 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 714 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
712 const SurfaceParams& params, 715 const SurfaceParams& params,
713 const GPUVAddr gpu_addr, 716 const GPUVAddr gpu_addr,
714 const VAddr cpu_addr, 717 const VAddr cpu_addr,
@@ -810,7 +813,7 @@ private:
810 TSurface& current_surface = iter->second; 813 TSurface& current_surface = iter->second;
811 const auto topological_result = current_surface->MatchesTopology(params); 814 const auto topological_result = current_surface->MatchesTopology(params);
812 if (topological_result != MatchTopologyResult::FullMatch) { 815 if (topological_result != MatchTopologyResult::FullMatch) {
813 std::vector<TSurface> overlaps{current_surface}; 816 VectorSurface overlaps{current_surface};
814 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 817 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
815 topological_result); 818 topological_result);
816 } 819 }
@@ -991,7 +994,9 @@ private:
991 params.target = target; 994 params.target = target;
992 params.is_tiled = false; 995 params.is_tiled = false;
993 params.srgb_conversion = false; 996 params.srgb_conversion = false;
994 params.is_layered = false; 997 params.is_layered =
998 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
999 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
995 params.block_width = 0; 1000 params.block_width = 0;
996 params.block_height = 0; 1001 params.block_height = 0;
997 params.block_depth = 0; 1002 params.block_depth = 0;
@@ -1124,23 +1129,25 @@ private:
1124 } 1129 }
1125 } 1130 }
1126 1131
1127 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1132 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1128 if (size == 0) { 1133 if (size == 0) {
1129 return {}; 1134 return {};
1130 } 1135 }
1131 const VAddr cpu_addr_end = cpu_addr + size; 1136 const VAddr cpu_addr_end = cpu_addr + size;
1132 VAddr start = cpu_addr >> registry_page_bits;
1133 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1137 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1134 std::vector<TSurface> surfaces; 1138 VectorSurface surfaces;
1135 while (start <= end) { 1139 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1136 std::vector<TSurface>& list = registry[start]; 1140 const auto it = registry.find(start);
1137 for (auto& surface : list) { 1141 if (it == registry.end()) {
1138 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { 1142 continue;
1139 surface->MarkAsPicked(true); 1143 }
1140 surfaces.push_back(surface); 1144 for (auto& surface : it->second) {
1145 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
1146 continue;
1141 } 1147 }
1148 surface->MarkAsPicked(true);
1149 surfaces.push_back(surface);
1142 } 1150 }
1143 start++;
1144 } 1151 }
1145 for (auto& surface : surfaces) { 1152 for (auto& surface : surfaces) {
1146 surface->MarkAsPicked(false); 1153 surface->MarkAsPicked(false);
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 1adf8932b..1f5e43043 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -106,6 +106,9 @@ public:
106 format.setVersion(4, 3); 106 format.setVersion(4, 3);
107 format.setProfile(QSurfaceFormat::CompatibilityProfile); 107 format.setProfile(QSurfaceFormat::CompatibilityProfile);
108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); 108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
109 if (Settings::values.renderer_debug) {
110 format.setOption(QSurfaceFormat::FormatOption::DebugContext);
111 }
109 // TODO: expose a setting for buffer value (ie default/single/double/triple) 112 // TODO: expose a setting for buffer value (ie default/single/double/triple)
110 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); 113 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
111 format.setSwapInterval(0); 114 format.setSwapInterval(0);
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 0b291c7d0..270cccc77 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -65,6 +65,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
65#include "common/logging/backend.h" 65#include "common/logging/backend.h"
66#include "common/logging/filter.h" 66#include "common/logging/filter.h"
67#include "common/logging/log.h" 67#include "common/logging/log.h"
68#include "common/memory_detect.h"
68#include "common/microprofile.h" 69#include "common/microprofile.h"
69#include "common/scm_rev.h" 70#include "common/scm_rev.h"
70#include "common/scope_exit.h" 71#include "common/scope_exit.h"
@@ -219,6 +220,10 @@ GMainWindow::GMainWindow()
219 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); 220 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
220#endif 221#endif
221 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); 222 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
223 LOG_INFO(Frontend, "Host RAM: {:.2f} GB",
224 Common::GetMemInfo().TotalPhysicalMemory / 1024.0f / 1024 / 1024);
225 LOG_INFO(Frontend, "Host Swap: {:.2f} GB",
226 Common::GetMemInfo().TotalSwapMemory / 1024.0f / 1024 / 1024);
222 UpdateWindowTitle(); 227 UpdateWindowTitle();
223 228
224 show(); 229 show();
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 411e7e647..09cc0a3b5 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); 98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); 99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); 100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
101 if (Settings::values.renderer_debug) {
102 SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
103 }
101 SDL_GL_SetSwapInterval(0); 104 SDL_GL_SetSwapInterval(0);
102 105
103 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, 106 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,