summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt4
-rw-r--r--externals/CMakeLists.txt8
m---------externals/xbyak0
-rw-r--r--src/common/CMakeLists.txt4
-rw-r--r--src/common/x64/xbyak_abi.h266
-rw-r--r--src/common/x64/xbyak_util.h47
-rw-r--r--src/core/hle/service/hid/hid.cpp13
-rw-r--r--src/core/hle/service/hid/hid.h1
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h40
-rw-r--r--src/video_core/engines/maxwell_3d.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp50
-rw-r--r--src/video_core/renderer_opengl/gl_device.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp107
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp109
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp143
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp37
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h3
-rw-r--r--src/video_core/shader/decode/other.cpp16
-rw-r--r--src/video_core/shader/node.h5
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/texture_cache.h53
-rw-r--r--src/yuzu/bootmanager.cpp3
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp3
33 files changed, 739 insertions, 250 deletions
diff --git a/.gitmodules b/.gitmodules
index bf3b80d59..2ec9dda62 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -28,3 +28,6 @@
28[submodule "libzip"] 28[submodule "libzip"]
29 path = externals/libzip/libzip 29 path = externals/libzip/libzip
30 url = https://github.com/nih-at/libzip.git 30 url = https://github.com/nih-at/libzip.git
31[submodule "xbyak"]
32 path = externals/xbyak
33 url = https://github.com/herumi/xbyak.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 61321bf0a..a9f669d56 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
1cmake_minimum_required(VERSION 3.11) 1cmake_minimum_required(VERSION 3.15)
2 2
3list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") 3list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
4list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") 4list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
@@ -13,7 +13,7 @@ project(yuzu)
13option(ENABLE_SDL2 "Enable the SDL2 frontend" ON) 13option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
14 14
15option(ENABLE_QT "Enable the Qt frontend" ON) 15option(ENABLE_QT "Enable the Qt frontend" ON)
16CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF) 16CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF)
17 17
18option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) 18option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
19 19
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 0b40cd1b0..df7a5e0a9 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -75,3 +75,11 @@ if (ENABLE_WEB_SERVICE)
75 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) 75 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
76 target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) 76 target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
77endif() 77endif()
78
79if (NOT TARGET xbyak)
80 if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
81 add_library(xbyak INTERFACE)
82 target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
83 target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
84 endif()
85endif()
diff --git a/externals/xbyak b/externals/xbyak
new file mode 160000
Subproject 82b70e665918efc2ee348091742fd0237b3b68c
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 264dff546..24b7a083c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -171,10 +171,12 @@ if(ARCHITECTURE_x86_64)
171 PRIVATE 171 PRIVATE
172 x64/cpu_detect.cpp 172 x64/cpu_detect.cpp
173 x64/cpu_detect.h 173 x64/cpu_detect.h
174 x64/xbyak_abi.h
175 x64/xbyak_util.h
174 ) 176 )
175endif() 177endif()
176 178
177create_target_directory_groups(common) 179create_target_directory_groups(common)
178 180
179target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) 181target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile)
180target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) 182target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..794da8a52
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,266 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <initializer_list>
9#include <xbyak.h>
10#include "common/assert.h"
11
12namespace Common::X64 {
13
14inline int RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers.");
18 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20}
21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) {
23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index);
25}
26
27inline Xbyak::Xmm IndexToXmm(int reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16);
30}
31
32inline Xbyak::Reg IndexToReg(int reg_index) {
33 if (reg_index < 16) {
34 return IndexToReg64(reg_index);
35 } else {
36 return IndexToXmm(reg_index);
37 }
38}
39
40inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
41 std::bitset<32> bits;
42 for (const Xbyak::Reg& reg : regs) {
43 bits[RegToIndex(reg)] = true;
44 }
45 return bits;
46}
47
48const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
49const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
50
51#ifdef _WIN32
52
53// Microsoft x64 ABI
54const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
55const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
56const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
57const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
58const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
59
60const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
61 // GPRs
62 Xbyak::util::rcx,
63 Xbyak::util::rdx,
64 Xbyak::util::r8,
65 Xbyak::util::r9,
66 Xbyak::util::r10,
67 Xbyak::util::r11,
68 // XMMs
69 Xbyak::util::xmm0,
70 Xbyak::util::xmm1,
71 Xbyak::util::xmm2,
72 Xbyak::util::xmm3,
73 Xbyak::util::xmm4,
74 Xbyak::util::xmm5,
75});
76
77const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
78 // GPRs
79 Xbyak::util::rbx,
80 Xbyak::util::rsi,
81 Xbyak::util::rdi,
82 Xbyak::util::rbp,
83 Xbyak::util::r12,
84 Xbyak::util::r13,
85 Xbyak::util::r14,
86 Xbyak::util::r15,
87 // XMMs
88 Xbyak::util::xmm6,
89 Xbyak::util::xmm7,
90 Xbyak::util::xmm8,
91 Xbyak::util::xmm9,
92 Xbyak::util::xmm10,
93 Xbyak::util::xmm11,
94 Xbyak::util::xmm12,
95 Xbyak::util::xmm13,
96 Xbyak::util::xmm14,
97 Xbyak::util::xmm15,
98});
99
100constexpr size_t ABI_SHADOW_SPACE = 0x20;
101
102#else
103
104// System V x86-64 ABI
105const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
106const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
107const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
108const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
109const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
110
111const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
112 // GPRs
113 Xbyak::util::rcx,
114 Xbyak::util::rdx,
115 Xbyak::util::rdi,
116 Xbyak::util::rsi,
117 Xbyak::util::r8,
118 Xbyak::util::r9,
119 Xbyak::util::r10,
120 Xbyak::util::r11,
121 // XMMs
122 Xbyak::util::xmm0,
123 Xbyak::util::xmm1,
124 Xbyak::util::xmm2,
125 Xbyak::util::xmm3,
126 Xbyak::util::xmm4,
127 Xbyak::util::xmm5,
128 Xbyak::util::xmm6,
129 Xbyak::util::xmm7,
130 Xbyak::util::xmm8,
131 Xbyak::util::xmm9,
132 Xbyak::util::xmm10,
133 Xbyak::util::xmm11,
134 Xbyak::util::xmm12,
135 Xbyak::util::xmm13,
136 Xbyak::util::xmm14,
137 Xbyak::util::xmm15,
138});
139
140const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
141 // GPRs
142 Xbyak::util::rbx,
143 Xbyak::util::rbp,
144 Xbyak::util::r12,
145 Xbyak::util::r13,
146 Xbyak::util::r14,
147 Xbyak::util::r15,
148});
149
150constexpr size_t ABI_SHADOW_SPACE = 0;
151
152#endif
153
154inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
155 size_t needed_frame_size, s32* out_subtraction,
156 s32* out_xmm_offset) {
157 const auto count = (regs & ABI_ALL_GPRS).count();
158 rsp_alignment -= count * 8;
159 size_t subtraction = 0;
160 const auto xmm_count = (regs & ABI_ALL_XMMS).count();
161 if (xmm_count) {
162 // If we have any XMMs to save, we must align the stack here.
163 subtraction = rsp_alignment & 0xF;
164 }
165 subtraction += 0x10 * xmm_count;
166 size_t xmm_base_subtraction = subtraction;
167 subtraction += needed_frame_size;
168 subtraction += ABI_SHADOW_SPACE;
169 // Final alignment.
170 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF;
172
173 *out_subtraction = (s32)subtraction;
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
175}
176
177inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
178 size_t rsp_alignment, size_t needed_frame_size = 0) {
179 s32 subtraction, xmm_offset;
180 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
181 for (std::size_t i = 0; i < regs.size(); ++i) {
182 if (regs[i] && ABI_ALL_GPRS[i]) {
183 code.push(IndexToReg64(static_cast<int>(i)));
184 }
185 }
186 if (subtraction != 0) {
187 code.sub(code.rsp, subtraction);
188 }
189
190 for (int i = 0; i < regs.count(); i++) {
191 if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
192 code.push(IndexToReg64(i));
193 }
194 }
195
196 for (std::size_t i = 0; i < regs.size(); ++i) {
197 if (regs[i] && ABI_ALL_XMMS[i]) {
198 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
199 xmm_offset += 0x10;
200 }
201 }
202
203 return ABI_SHADOW_SPACE;
204}
205
206inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
207 size_t rsp_alignment, size_t needed_frame_size = 0) {
208 s32 subtraction, xmm_offset;
209 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
210
211 for (std::size_t i = 0; i < regs.size(); ++i) {
212 if (regs[i] && ABI_ALL_XMMS[i]) {
213 code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
214 xmm_offset += 0x10;
215 }
216 }
217
218 if (subtraction != 0) {
219 code.add(code.rsp, subtraction);
220 }
221
222 // GPRs need to be popped in reverse order
223 for (int i = 15; i >= 0; i--) {
224 if (regs[i]) {
225 code.pop(IndexToReg64(i));
226 }
227 }
228}
229
230inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
231 size_t rsp_alignment,
232 size_t needed_frame_size = 0) {
233 s32 subtraction, xmm_offset;
234 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
235
236 for (std::size_t i = 0; i < regs.size(); ++i) {
237 if (regs[i] && ABI_ALL_GPRS[i]) {
238 code.push(IndexToReg64(static_cast<int>(i)));
239 }
240 }
241
242 if (subtraction != 0) {
243 code.sub(code.rsp, subtraction);
244 }
245
246 return ABI_SHADOW_SPACE;
247}
248
249inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
250 size_t rsp_alignment, size_t needed_frame_size = 0) {
251 s32 subtraction, xmm_offset;
252 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
253
254 if (subtraction != 0) {
255 code.add(code.rsp, subtraction);
256 }
257
258 // GPRs need to be popped in reverse order
259 for (int i = 15; i >= 0; i--) {
260 if (regs[i]) {
261 code.pop(IndexToReg64(i));
262 }
263 }
264}
265
266} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..df17f8cbe
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,47 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <xbyak.h>
9#include "common/x64/xbyak_abi.h"
10
11namespace Common::X64 {
12
13// Constants for use with cmpps/cmpss
14enum {
15 CMP_EQ = 0,
16 CMP_LT = 1,
17 CMP_LE = 2,
18 CMP_UNORD = 3,
19 CMP_NEQ = 4,
20 CMP_NLT = 5,
21 CMP_NLE = 6,
22 CMP_ORD = 7,
23};
24
25constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
26 const u64 distance = target - (ref + 5);
27 return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
28}
29
30inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
31 return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
32}
33
34template <typename T>
35inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
36 static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
37 size_t addr = reinterpret_cast<size_t>(f);
38 if (IsWithin2G(code, addr)) {
39 code.call(f);
40 } else {
41 // ABI_RETURN is a safe temp register to use before a call
42 code.mov(ABI_RETURN, addr);
43 code.call(ABI_RETURN);
44 }
45}
46
47} // namespace Common::X64
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index c84cb1483..72a050de2 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -161,7 +161,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
161 {40, nullptr, "AcquireXpadIdEventHandle"}, 161 {40, nullptr, "AcquireXpadIdEventHandle"},
162 {41, nullptr, "ReleaseXpadIdEventHandle"}, 162 {41, nullptr, "ReleaseXpadIdEventHandle"},
163 {51, &Hid::ActivateXpad, "ActivateXpad"}, 163 {51, &Hid::ActivateXpad, "ActivateXpad"},
164 {55, nullptr, "GetXpadIds"}, 164 {55, &Hid::GetXpadIDs, "GetXpadIds"},
165 {56, nullptr, "ActivateJoyXpad"}, 165 {56, nullptr, "ActivateJoyXpad"},
166 {58, nullptr, "GetJoyXpadLifoHandle"}, 166 {58, nullptr, "GetJoyXpadLifoHandle"},
167 {59, nullptr, "GetJoyXpadIds"}, 167 {59, nullptr, "GetJoyXpadIds"},
@@ -319,6 +319,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) {
319 rb.Push(RESULT_SUCCESS); 319 rb.Push(RESULT_SUCCESS);
320} 320}
321 321
322void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) {
323 IPC::RequestParser rp{ctx};
324 const auto applet_resource_user_id{rp.Pop<u64>()};
325
326 LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id);
327
328 IPC::ResponseBuilder rb{ctx, 3};
329 rb.Push(RESULT_SUCCESS);
330 rb.Push(0);
331}
332
322void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { 333void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) {
323 IPC::RequestParser rp{ctx}; 334 IPC::RequestParser rp{ctx};
324 const auto applet_resource_user_id{rp.Pop<u64>()}; 335 const auto applet_resource_user_id{rp.Pop<u64>()};
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index c8ed4ad8b..d481a75f8 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -86,6 +86,7 @@ public:
86private: 86private:
87 void CreateAppletResource(Kernel::HLERequestContext& ctx); 87 void CreateAppletResource(Kernel::HLERequestContext& ctx);
88 void ActivateXpad(Kernel::HLERequestContext& ctx); 88 void ActivateXpad(Kernel::HLERequestContext& ctx);
89 void GetXpadIDs(Kernel::HLERequestContext& ctx);
89 void ActivateDebugPad(Kernel::HLERequestContext& ctx); 90 void ActivateDebugPad(Kernel::HLERequestContext& ctx);
90 void ActivateTouchScreen(Kernel::HLERequestContext& ctx); 91 void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
91 void ActivateMouse(Kernel::HLERequestContext& ctx); 92 void ActivateMouse(Kernel::HLERequestContext& ctx);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f00c71dae..d6ee82836 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -229,7 +229,7 @@ endif()
229create_target_directory_groups(video_core) 229create_target_directory_groups(video_core)
230 230
231target_link_libraries(video_core PUBLIC common core) 231target_link_libraries(video_core PUBLIC common core)
232target_link_libraries(video_core PRIVATE glad) 232target_link_libraries(video_core PRIVATE glad xbyak)
233 233
234if (ENABLE_VULKAN) 234if (ENABLE_VULKAN)
235 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 235 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d9a4a1b4d..b88fce2cd 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -56,24 +56,28 @@ public:
56 if (use_fast_cbuf || size < max_stream_size) { 56 if (use_fast_cbuf || size < max_stream_size) {
57 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { 57 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
58 auto& memory_manager = system.GPU().MemoryManager(); 58 auto& memory_manager = system.GPU().MemoryManager();
59 const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
59 if (use_fast_cbuf) { 60 if (use_fast_cbuf) {
60 if (memory_manager.IsGranularRange(gpu_addr, size)) { 61 u8* dest;
61 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 62 if (is_granular) {
62 return ConstBufferUpload(host_ptr, size); 63 dest = memory_manager.GetPointer(gpu_addr);
63 } else { 64 } else {
64 staging_buffer.resize(size); 65 staging_buffer.resize(size);
65 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); 66 dest = staging_buffer.data();
66 return ConstBufferUpload(staging_buffer.data(), size); 67 memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
67 } 68 }
69 return ConstBufferUpload(dest, size);
70 }
71 if (is_granular) {
72 u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
73 return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
74 std::memcpy(dest, host_ptr, size);
75 });
68 } else { 76 } else {
69 if (memory_manager.IsGranularRange(gpu_addr, size)) { 77 return StreamBufferUpload(
70 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 78 size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
71 return StreamBufferUpload(host_ptr, size, alignment); 79 memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
72 } else { 80 });
73 staging_buffer.resize(size);
74 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
75 return StreamBufferUpload(staging_buffer.data(), size, alignment);
76 }
77 } 81 }
78 } 82 }
79 } 83 }
@@ -101,7 +105,9 @@ public:
101 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, 105 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
102 std::size_t alignment = 4) { 106 std::size_t alignment = 4) {
103 std::lock_guard lock{mutex}; 107 std::lock_guard lock{mutex};
104 return StreamBufferUpload(raw_pointer, size, alignment); 108 return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
109 std::memcpy(dest, raw_pointer, size);
110 });
105 } 111 }
106 112
107 void Map(std::size_t max_size) { 113 void Map(std::size_t max_size) {
@@ -424,11 +430,11 @@ private:
424 map->MarkAsModified(false, 0); 430 map->MarkAsModified(false, 0);
425 } 431 }
426 432
427 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, 433 template <typename Callable>
428 std::size_t alignment) { 434 BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
429 AlignBuffer(alignment); 435 AlignBuffer(alignment);
430 const std::size_t uploaded_offset = buffer_offset; 436 const std::size_t uploaded_offset = buffer_offset;
431 std::memcpy(buffer_ptr, raw_pointer, size); 437 callable(buffer_ptr);
432 438
433 buffer_ptr += size; 439 buffer_ptr += size;
434 buffer_offset += size; 440 buffer_offset += size;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 024c9e43b..13ef2e42d 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -106,7 +106,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
106 regs.rasterize_enable = 1; 106 regs.rasterize_enable = 1;
107 regs.rt_separate_frag_data = 1; 107 regs.rt_separate_frag_data = 1;
108 regs.framebuffer_srgb = 1; 108 regs.framebuffer_srgb = 1;
109 regs.line_width_aliased = 1.0f;
110 regs.line_width_smooth = 1.0f;
109 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; 111 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
112 regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
113 regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
110 114
111 shadow_state = regs; 115 shadow_state = regs;
112 116
@@ -457,8 +461,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
457 461
458void Maxwell3D::ProcessQueryGet() { 462void Maxwell3D::ProcessQueryGet() {
459 // TODO(Subv): Support the other query units. 463 // TODO(Subv): Support the other query units.
460 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 464 if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
461 "Units other than CROP are unimplemented"); 465 LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
466 }
462 467
463 switch (regs.query.query_get.operation) { 468 switch (regs.query.query_get.operation) {
464 case Regs::QueryOperation::Release: 469 case Regs::QueryOperation::Release:
@@ -534,8 +539,8 @@ void Maxwell3D::ProcessCounterReset() {
534 rasterizer.ResetCounter(QueryType::SamplesPassed); 539 rasterizer.ResetCounter(QueryType::SamplesPassed);
535 break; 540 break;
536 default: 541 default:
537 LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", 542 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
538 static_cast<int>(regs.counter_reset)); 543 static_cast<int>(regs.counter_reset));
539 break; 544 break;
540 } 545 }
541} 546}
@@ -592,8 +597,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
592 system.GPU().GetTicks()); 597 system.GPU().GetTicks());
593 return {}; 598 return {};
594 default: 599 default:
595 UNIMPLEMENTED_MSG("Unimplemented query select type {}", 600 LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
596 static_cast<u32>(regs.query.query_get.select.Value())); 601 static_cast<u32>(regs.query.query_get.select.Value()));
597 return 1; 602 return 1;
598 } 603 }
599} 604}
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index ff35d362d..b772c37d9 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <cstring> 8#include <cstring>
9#include <limits>
9#include <optional> 10#include <optional>
10#include <vector> 11#include <vector>
11 12
@@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
26 27
27constexpr u32 NumStages = 5; 28constexpr u32 NumStages = 5;
28 29
29constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 30constexpr std::array LimitUBOs = {
30 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, 31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
31 GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; 32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
32 34
33constexpr std::array LimitSSBOs = { 35constexpr std::array LimitSSBOs = {
34 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, 36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
35 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, 37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
36 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; 38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
37 39
38constexpr std::array LimitSamplers = { 40constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
39 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, 41 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
40 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, 42 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
41 GL_MAX_TEXTURE_IMAGE_UNITS}; 43 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
44 GL_MAX_TEXTURE_IMAGE_UNITS,
45 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
42 46
43constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, 47constexpr std::array LimitImages = {
44 GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, 48 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
45 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, 49 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
46 GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; 50 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
47 51
48template <typename T> 52template <typename T>
49T GetInteger(GLenum pname) { 53T GetInteger(GLenum pname) {
@@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
85 return std::exchange(base, base + amount); 89 return std::exchange(base, base + amount);
86} 90}
87 91
92std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
93 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
94 std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
95 [](GLenum pname) { return GetInteger<u32>(pname); });
96 return max;
97}
98
88std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { 99std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
89 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; 100 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
90 101
@@ -171,15 +182,14 @@ bool IsASTCSupported() {
171 182
172} // Anonymous namespace 183} // Anonymous namespace
173 184
174Device::Device() : base_bindings{BuildBaseBindings()} { 185Device::Device()
186 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
175 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 187 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
176 const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); 188 const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
177 const std::vector extensions = GetExtensions(); 189 const std::vector extensions = GetExtensions();
178 190
179 const bool is_nvidia = vendor == "NVIDIA Corporation"; 191 const bool is_nvidia = vendor == "NVIDIA Corporation";
180 const bool is_amd = vendor == "ATI Technologies Inc."; 192 const bool is_amd = vendor == "ATI Technologies Inc.";
181 const bool is_intel = vendor == "Intel";
182 const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
183 193
184 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 194 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
185 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 195 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -194,7 +204,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
194 has_variable_aoffi = TestVariableAoffi(); 204 has_variable_aoffi = TestVariableAoffi();
195 has_component_indexing_bug = is_amd; 205 has_component_indexing_bug = is_amd;
196 has_precise_bug = TestPreciseBug(); 206 has_precise_bug = TestPreciseBug();
197 has_broken_compute = is_intel_proprietary;
198 has_fast_buffer_sub_data = is_nvidia; 207 has_fast_buffer_sub_data = is_nvidia;
199 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 208 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
200 GLAD_GL_NV_compute_program5; 209 GLAD_GL_NV_compute_program5;
@@ -209,7 +218,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
209} 218}
210 219
211Device::Device(std::nullptr_t) { 220Device::Device(std::nullptr_t) {
212 uniform_buffer_alignment = 0; 221 max_uniform_buffers.fill(std::numeric_limits<u32>::max());
222 uniform_buffer_alignment = 4;
223 shader_storage_alignment = 4;
213 max_vertex_attributes = 16; 224 max_vertex_attributes = 16;
214 max_varyings = 15; 225 max_varyings = 15;
215 has_warp_intrinsics = true; 226 has_warp_intrinsics = true;
@@ -217,9 +228,6 @@ Device::Device(std::nullptr_t) {
217 has_vertex_viewport_layer = true; 228 has_vertex_viewport_layer = true;
218 has_image_load_formatted = true; 229 has_image_load_formatted = true;
219 has_variable_aoffi = true; 230 has_variable_aoffi = true;
220 has_component_indexing_bug = false;
221 has_broken_compute = false;
222 has_precise_bug = false;
223} 231}
224 232
225bool Device::TestVariableAoffi() { 233bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e915dbd86..98cca0254 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -24,6 +24,10 @@ public:
24 explicit Device(); 24 explicit Device();
25 explicit Device(std::nullptr_t); 25 explicit Device(std::nullptr_t);
26 26
27 u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
28 return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
29 }
30
27 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { 31 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
28 return base_bindings[stage_index]; 32 return base_bindings[stage_index];
29 } 33 }
@@ -80,10 +84,6 @@ public:
80 return has_precise_bug; 84 return has_precise_bug;
81 } 85 }
82 86
83 bool HasBrokenCompute() const {
84 return has_broken_compute;
85 }
86
87 bool HasFastBufferSubData() const { 87 bool HasFastBufferSubData() const {
88 return has_fast_buffer_sub_data; 88 return has_fast_buffer_sub_data;
89 } 89 }
@@ -96,7 +96,8 @@ private:
96 static bool TestVariableAoffi(); 96 static bool TestVariableAoffi();
97 static bool TestPreciseBug(); 97 static bool TestPreciseBug();
98 98
99 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; 99 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
100 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
100 std::size_t uniform_buffer_alignment{}; 101 std::size_t uniform_buffer_alignment{};
101 std::size_t shader_storage_alignment{}; 102 std::size_t shader_storage_alignment{};
102 u32 max_vertex_attributes{}; 103 u32 max_vertex_attributes{};
@@ -109,7 +110,6 @@ private:
109 bool has_variable_aoffi{}; 110 bool has_variable_aoffi{};
110 bool has_component_indexing_bug{}; 111 bool has_component_indexing_bug{};
111 bool has_precise_bug{}; 112 bool has_precise_bug{};
112 bool has_broken_compute{};
113 bool has_fast_buffer_sub_data{}; 113 bool has_fast_buffer_sub_data{};
114 bool use_assembly_shaders{}; 114 bool use_assembly_shaders{};
115}; 115};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 716d43e65..55e79aaf6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -54,6 +54,12 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
54 54
55namespace { 55namespace {
56 56
57constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
58constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
59 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
60constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
61 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
62
57constexpr std::size_t NumSupportedVertexAttributes = 16; 63constexpr std::size_t NumSupportedVertexAttributes = 16;
58 64
59template <typename Engine, typename Entry> 65template <typename Engine, typename Entry>
@@ -104,6 +110,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
104 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { 110 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
105 CheckExtensions(); 111 CheckExtensions();
106 112
113 unified_uniform_buffer.Create();
114 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
115
107 if (device.UseAssemblyShaders()) { 116 if (device.UseAssemblyShaders()) {
108 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); 117 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
109 for (const GLuint cbuf : staging_cbufs) { 118 for (const GLuint cbuf : staging_cbufs) {
@@ -655,10 +664,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
655} 664}
656 665
657void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 666void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
658 if (device.HasBrokenCompute()) {
659 return;
660 }
661
662 buffer_cache.Acquire(); 667 buffer_cache.Acquire();
663 current_cbuf = 0; 668 current_cbuf = 0;
664 669
@@ -846,34 +851,56 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
846 MICROPROFILE_SCOPE(OpenGL_UBO); 851 MICROPROFILE_SCOPE(OpenGL_UBO);
847 const auto& stages = system.GPU().Maxwell3D().state.shader_stages; 852 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
848 const auto& shader_stage = stages[stage_index]; 853 const auto& shader_stage = stages[stage_index];
854 const auto& entries = shader->GetEntries();
855 const bool use_unified = entries.use_unified_uniforms;
856 const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
849 857
850 u32 binding = 858 const auto base_bindings = device.GetBaseBindings(stage_index);
851 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; 859 u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
852 for (const auto& entry : shader->GetEntries().const_buffers) { 860 for (const auto& entry : entries.const_buffers) {
853 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 861 const u32 index = entry.GetIndex();
854 SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); 862 const auto& buffer = shader_stage.const_buffers[index];
863 SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
864 base_unified_offset + index * Maxwell::MaxConstBufferSize);
865 ++binding;
866 }
867 if (use_unified) {
868 const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
869 entries.global_memory_entries.size());
870 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
871 base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
855 } 872 }
856} 873}
857 874
858void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { 875void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
859 MICROPROFILE_SCOPE(OpenGL_UBO); 876 MICROPROFILE_SCOPE(OpenGL_UBO);
860 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 877 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
878 const auto& entries = kernel->GetEntries();
879 const bool use_unified = entries.use_unified_uniforms;
861 880
862 u32 binding = 0; 881 u32 binding = 0;
863 for (const auto& entry : kernel->GetEntries().const_buffers) { 882 for (const auto& entry : entries.const_buffers) {
864 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 883 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
865 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 884 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
866 Tegra::Engines::ConstBufferInfo buffer; 885 Tegra::Engines::ConstBufferInfo buffer;
867 buffer.address = config.Address(); 886 buffer.address = config.Address();
868 buffer.size = config.size; 887 buffer.size = config.size;
869 buffer.enabled = mask[entry.GetIndex()]; 888 buffer.enabled = mask[entry.GetIndex()];
870 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); 889 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
890 use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
891 ++binding;
892 }
893 if (use_unified) {
894 const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
895 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
896 NUM_CONST_BUFFERS_BYTES_PER_STAGE);
871 } 897 }
872} 898}
873 899
874void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, 900void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
875 const Tegra::Engines::ConstBufferInfo& buffer, 901 const Tegra::Engines::ConstBufferInfo& buffer,
876 const ConstBufferEntry& entry) { 902 const ConstBufferEntry& entry, bool use_unified,
903 std::size_t unified_offset) {
877 if (!buffer.enabled) { 904 if (!buffer.enabled) {
878 // Set values to zero to unbind buffers 905 // Set values to zero to unbind buffers
879 if (device.UseAssemblyShaders()) { 906 if (device.UseAssemblyShaders()) {
@@ -889,20 +916,29 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
889 // UBO alignment requirements. 916 // UBO alignment requirements.
890 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); 917 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
891 918
892 const auto alignment = device.GetUniformBufferAlignment(); 919 const bool fast_upload = !use_unified && device.HasFastBufferSubData();
893 auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, 920
894 device.HasFastBufferSubData()); 921 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
895 if (!device.UseAssemblyShaders()) { 922 const GPUVAddr gpu_addr = buffer.address;
896 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 923 auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
924
925 if (device.UseAssemblyShaders()) {
926 UNIMPLEMENTED_IF(use_unified);
927 if (offset != 0) {
928 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
929 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
930 cbuf = staging_cbuf;
931 offset = 0;
932 }
933 glBindBufferRangeNV(stage, binding, cbuf, offset, size);
897 return; 934 return;
898 } 935 }
899 if (offset != 0) { 936
900 const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; 937 if (use_unified) {
901 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); 938 glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
902 cbuf = staging_cbuf; 939 } else {
903 offset = 0; 940 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
904 } 941 }
905 glBindBufferRangeNV(stage, binding, cbuf, offset, size);
906} 942}
907 943
908void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { 944void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@@ -1024,6 +1060,26 @@ void RasterizerOpenGL::SyncViewport() {
1024 const auto& regs = gpu.regs; 1060 const auto& regs = gpu.regs;
1025 1061
1026 const bool dirty_viewport = flags[Dirty::Viewports]; 1062 const bool dirty_viewport = flags[Dirty::Viewports];
1063 const bool dirty_clip_control = flags[Dirty::ClipControl];
1064
1065 if (dirty_clip_control || flags[Dirty::FrontFace]) {
1066 flags[Dirty::FrontFace] = false;
1067
1068 GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
1069 if (regs.screen_y_control.triangle_rast_flip != 0 &&
1070 regs.viewport_transform[0].scale_y < 0.0f) {
1071 switch (mode) {
1072 case GL_CW:
1073 mode = GL_CCW;
1074 break;
1075 case GL_CCW:
1076 mode = GL_CW;
1077 break;
1078 }
1079 }
1080 glFrontFace(mode);
1081 }
1082
1027 if (dirty_viewport || flags[Dirty::ClipControl]) { 1083 if (dirty_viewport || flags[Dirty::ClipControl]) {
1028 flags[Dirty::ClipControl] = false; 1084 flags[Dirty::ClipControl] = false;
1029 1085
@@ -1121,11 +1177,6 @@ void RasterizerOpenGL::SyncCullMode() {
1121 glDisable(GL_CULL_FACE); 1177 glDisable(GL_CULL_FACE);
1122 } 1178 }
1123 } 1179 }
1124
1125 if (flags[Dirty::FrontFace]) {
1126 flags[Dirty::FrontFace] = false;
1127 glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
1128 }
1129} 1180}
1130 1181
1131void RasterizerOpenGL::SyncPrimitiveRestart() { 1182void RasterizerOpenGL::SyncPrimitiveRestart() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 87f7fe159..f5dc56a0e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -107,7 +107,8 @@ private:
107 107
108 /// Configures a constant buffer. 108 /// Configures a constant buffer.
109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
110 const ConstBufferEntry& entry); 110 const ConstBufferEntry& entry, bool use_unified,
111 std::size_t unified_offset);
111 112
112 /// Configures the current global memory entries to use for the draw command. 113 /// Configures the current global memory entries to use for the draw command.
113 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 114 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -253,6 +254,7 @@ private:
253 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; 254 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
254 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; 255 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
255 std::size_t current_cbuf = 0; 256 std::size_t current_cbuf = 0;
257 OGLBuffer unified_uniform_buffer;
256 258
257 /// Number of commands queued to the OpenGL driver. Reseted on flush. 259 /// Number of commands queued to the OpenGL driver. Reseted on flush.
258 std::size_t num_queued_commands = 0; 260 std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4cd0f36cf..a991ca64a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -241,8 +241,9 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
241 entry.bindless_samplers = registry->GetBindlessSamplers(); 241 entry.bindless_samplers = registry->GetBindlessSamplers();
242 params.disk_cache.SaveEntry(std::move(entry)); 242 params.disk_cache.SaveEntry(std::move(entry));
243 243
244 return std::shared_ptr<CachedShader>(new CachedShader( 244 return std::shared_ptr<CachedShader>(
245 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); 245 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
246 MakeEntries(params.device, ir, shader_type), std::move(program)));
246} 247}
247 248
248Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 249Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -265,8 +266,9 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
265 entry.bindless_samplers = registry->GetBindlessSamplers(); 266 entry.bindless_samplers = registry->GetBindlessSamplers();
266 params.disk_cache.SaveEntry(std::move(entry)); 267 params.disk_cache.SaveEntry(std::move(entry));
267 268
268 return std::shared_ptr<CachedShader>(new CachedShader( 269 return std::shared_ptr<CachedShader>(
269 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); 270 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
271 MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
270} 272}
271 273
272Shader CachedShader::CreateFromCache(const ShaderParameters& params, 274Shader CachedShader::CreateFromCache(const ShaderParameters& params,
@@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
348 PrecompiledShader shader; 350 PrecompiledShader shader;
349 shader.program = std::move(program); 351 shader.program = std::move(program);
350 shader.registry = std::move(registry); 352 shader.registry = std::move(registry);
351 shader.entries = MakeEntries(ir); 353 shader.entries = MakeEntries(device, ir, entry.type);
352 354
353 std::scoped_lock lock{mutex}; 355 std::scoped_lock lock{mutex};
354 if (callback) { 356 if (callback) {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 253484968..502b95973 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -61,8 +61,8 @@ struct TextureDerivates {};
61using TextureArgument = std::pair<Type, Node>; 61using TextureArgument = std::pair<Type, Node>;
62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; 62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
63 63
64constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); 65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66 66
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt 67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint 68#define ftou floatBitsToUint
@@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
403} 403}
404 404
405bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
406 const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
407 // We waste one UBO for emulation
408 const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
409 return num_ubos > num_available_ubos;
410}
411
405struct GenericVaryingDescription { 412struct GenericVaryingDescription {
406 std::string name; 413 std::string name;
407 u8 first_element = 0; 414 u8 first_element = 0;
@@ -412,8 +419,9 @@ class GLSLDecompiler final {
412public: 419public:
413 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 420 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
414 ShaderType stage, std::string_view identifier, std::string_view suffix) 421 ShaderType stage, std::string_view identifier, std::string_view suffix)
415 : device{device}, ir{ir}, registry{registry}, stage{stage}, 422 : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
416 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { 423 suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
424 UseUnifiedUniforms(device, ir, stage)} {
417 if (stage != ShaderType::Compute) { 425 if (stage != ShaderType::Compute) {
418 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); 426 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
419 } 427 }
@@ -834,12 +842,24 @@ private:
834 } 842 }
835 843
836 void DeclareConstantBuffers() { 844 void DeclareConstantBuffers() {
845 if (use_unified_uniforms) {
846 const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
847 static_cast<u32>(ir.GetGlobalMemory().size());
848 code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
849 binding);
850 code.AddLine(" uint cbufs[];");
851 code.AddLine("}};");
852 code.AddNewLine();
853 return;
854 }
855
837 u32 binding = device.GetBaseBindings(stage).uniform_buffer; 856 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
838 for (const auto& buffers : ir.GetConstantBuffers()) { 857 for (const auto [index, info] : ir.GetConstantBuffers()) {
839 const auto index = buffers.first; 858 const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
859 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
840 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, 860 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
841 GetConstBufferBlock(index)); 861 GetConstBufferBlock(index));
842 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); 862 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
843 code.AddLine("}};"); 863 code.AddLine("}};");
844 code.AddNewLine(); 864 code.AddNewLine();
845 } 865 }
@@ -1038,42 +1058,51 @@ private:
1038 1058
1039 if (const auto cbuf = std::get_if<CbufNode>(&*node)) { 1059 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1040 const Node offset = cbuf->GetOffset(); 1060 const Node offset = cbuf->GetOffset();
1061 const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
1062
1041 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1042 // Direct access 1064 // Direct access
1043 const u32 offset_imm = immediate->GetValue(); 1065 const u32 offset_imm = immediate->GetValue();
1044 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); 1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1045 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), 1067 if (use_unified_uniforms) {
1046 offset_imm / (4 * 4), (offset_imm / 4) % 4), 1068 return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
1047 Type::Uint}; 1069 Type::Uint};
1070 } else {
1071 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1072 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1073 Type::Uint};
1074 }
1048 } 1075 }
1049 1076
1050 if (std::holds_alternative<OperationNode>(*offset)) { 1077 // Indirect access
1051 // Indirect access 1078 if (use_unified_uniforms) {
1052 const std::string final_offset = code.GenerateTemporary(); 1079 return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
1053 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); 1080 Visit(offset).AsUint()),
1081 Type::Uint};
1082 }
1054 1083
1055 if (!device.HasComponentIndexingBug()) { 1084 const std::string final_offset = code.GenerateTemporary();
1056 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), 1085 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1057 final_offset, final_offset),
1058 Type::Uint};
1059 }
1060 1086
1061 // AMD's proprietary GLSL compiler emits ill code for variable component access. 1087 if (!device.HasComponentIndexingBug()) {
1062 // To bypass this driver bug generate 4 ifs, one per each component. 1088 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
1063 const std::string pack = code.GenerateTemporary(); 1089 final_offset, final_offset),
1064 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), 1090 Type::Uint};
1065 final_offset);
1066
1067 const std::string result = code.GenerateTemporary();
1068 code.AddLine("uint {};", result);
1069 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1070 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
1071 pack, GetSwizzle(swizzle));
1072 }
1073 return {result, Type::Uint};
1074 } 1091 }
1075 1092
1076 UNREACHABLE_MSG("Unmanaged offset node type"); 1093 // AMD's proprietary GLSL compiler emits ill code for variable component access.
1094 // To bypass this driver bug generate 4 ifs, one per each component.
1095 const std::string pack = code.GenerateTemporary();
1096 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
1097 final_offset);
1098
1099 const std::string result = code.GenerateTemporary();
1100 code.AddLine("uint {};", result);
1101 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1102 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
1103 GetSwizzle(swizzle));
1104 }
1105 return {result, Type::Uint};
1077 } 1106 }
1078 1107
1079 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 1108 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
@@ -2344,7 +2373,12 @@ private:
2344 return {}; 2373 return {};
2345 } 2374 }
2346 2375
2347 Expression MemoryBarrierGL(Operation) { 2376 Expression MemoryBarrierGroup(Operation) {
2377 code.AddLine("groupMemoryBarrier();");
2378 return {};
2379 }
2380
2381 Expression MemoryBarrierGlobal(Operation) {
2348 code.AddLine("memoryBarrier();"); 2382 code.AddLine("memoryBarrier();");
2349 return {}; 2383 return {};
2350 } 2384 }
@@ -2591,7 +2625,8 @@ private:
2591 &GLSLDecompiler::ShuffleIndexed, 2625 &GLSLDecompiler::ShuffleIndexed,
2592 2626
2593 &GLSLDecompiler::Barrier, 2627 &GLSLDecompiler::Barrier,
2594 &GLSLDecompiler::MemoryBarrierGL, 2628 &GLSLDecompiler::MemoryBarrierGroup,
2629 &GLSLDecompiler::MemoryBarrierGlobal,
2595 }; 2630 };
2596 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2631 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2597 2632
@@ -2704,6 +2739,7 @@ private:
2704 const std::string_view identifier; 2739 const std::string_view identifier;
2705 const std::string_view suffix; 2740 const std::string_view suffix;
2706 const Header header; 2741 const Header header;
2742 const bool use_unified_uniforms;
2707 std::unordered_map<u8, VaryingTFB> transform_feedback; 2743 std::unordered_map<u8, VaryingTFB> transform_feedback;
2708 2744
2709 ShaderWriter code; 2745 ShaderWriter code;
@@ -2899,7 +2935,7 @@ void GLSLDecompiler::DecompileAST() {
2899 2935
2900} // Anonymous namespace 2936} // Anonymous namespace
2901 2937
2902ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { 2938ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
2903 ShaderEntries entries; 2939 ShaderEntries entries;
2904 for (const auto& cbuf : ir.GetConstantBuffers()) { 2940 for (const auto& cbuf : ir.GetConstantBuffers()) {
2905 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), 2941 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2920,6 +2956,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
2920 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; 2956 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2921 } 2957 }
2922 entries.shader_length = ir.GetLength(); 2958 entries.shader_length = ir.GetLength();
2959 entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
2923 return entries; 2960 return entries;
2924} 2961}
2925 2962
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e8a178764..451c9689a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -53,11 +53,13 @@ struct ShaderEntries {
53 std::vector<GlobalMemoryEntry> global_memory_entries; 53 std::vector<GlobalMemoryEntry> global_memory_entries;
54 std::vector<SamplerEntry> samplers; 54 std::vector<SamplerEntry> samplers;
55 std::vector<ImageEntry> images; 55 std::vector<ImageEntry> images;
56 u32 clip_distances{};
57 std::size_t shader_length{}; 56 std::size_t shader_length{};
57 u32 clip_distances{};
58 bool use_unified_uniforms{};
58}; 59};
59 60
60ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); 61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
62 Tegra::Engines::ShaderType stage);
61 63
62std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 64std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
63 const VideoCommon::Shader::Registry& registry, 65 const VideoCommon::Shader::Registry& registry,
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6b489e6db..e7952924a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
753bool RendererOpenGL::Init() { 753bool RendererOpenGL::Init() {
754 if (GLAD_GL_KHR_debug) { 754 if (GLAD_GL_KHR_debug) {
755 glEnable(GL_DEBUG_OUTPUT); 755 glEnable(GL_DEBUG_OUTPUT);
756 if (Settings::values.renderer_debug) {
757 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
758 }
756 glDebugMessageCallback(DebugHandler, nullptr); 759 glDebugMessageCallback(DebugHandler, nullptr);
757 } 760 }
758 761
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 568744e3c..424278816 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
72 72
73 u32 packed_front_face = PackFrontFace(regs.front_face); 73 u32 packed_front_face = PackFrontFace(regs.front_face);
74 if (regs.screen_y_control.triangle_rast_flip != 0 && 74 if (regs.screen_y_control.triangle_rast_flip != 0) {
75 regs.viewport_transform[0].scale_y > 0.0f) {
76 // Flip front face 75 // Flip front face
77 packed_front_face = 1 - packed_front_face; 76 packed_front_face = 1 - packed_front_face;
78 } 77 }
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 2871035f5..62e950d31 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -149,7 +149,7 @@ struct FormatTuple {
149 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F 149 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
150 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U 150 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
151 {VK_FORMAT_UNDEFINED}, // R16S 151 {VK_FORMAT_UNDEFINED}, // R16S
152 {VK_FORMAT_UNDEFINED}, // R16UI 152 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
153 {VK_FORMAT_UNDEFINED}, // R16I 153 {VK_FORMAT_UNDEFINED}, // R16I
154 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 154 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
155 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F 155 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 750e5a0ca..9fd8ac3f6 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
73 73
74std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( 74std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
75 vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { 75 vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
76 static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, 76 static constexpr std::array formats{
77 VK_FORMAT_A8B8G8R8_UINT_PACK32, 77 VK_FORMAT_A8B8G8R8_UNORM_PACK32,
78 VK_FORMAT_A8B8G8R8_SNORM_PACK32, 78 VK_FORMAT_A8B8G8R8_UINT_PACK32,
79 VK_FORMAT_A8B8G8R8_SRGB_PACK32, 79 VK_FORMAT_A8B8G8R8_SNORM_PACK32,
80 VK_FORMAT_B5G6R5_UNORM_PACK16, 80 VK_FORMAT_A8B8G8R8_SRGB_PACK32,
81 VK_FORMAT_A2B10G10R10_UNORM_PACK32, 81 VK_FORMAT_B5G6R5_UNORM_PACK16,
82 VK_FORMAT_A1R5G5B5_UNORM_PACK16, 82 VK_FORMAT_A2B10G10R10_UNORM_PACK32,
83 VK_FORMAT_R32G32B32A32_SFLOAT, 83 VK_FORMAT_A1R5G5B5_UNORM_PACK16,
84 VK_FORMAT_R32G32B32A32_UINT, 84 VK_FORMAT_R32G32B32A32_SFLOAT,
85 VK_FORMAT_R32G32_SFLOAT, 85 VK_FORMAT_R32G32B32A32_UINT,
86 VK_FORMAT_R32G32_UINT, 86 VK_FORMAT_R32G32_SFLOAT,
87 VK_FORMAT_R16G16B16A16_UINT, 87 VK_FORMAT_R32G32_UINT,
88 VK_FORMAT_R16G16B16A16_SNORM, 88 VK_FORMAT_R16G16B16A16_UINT,
89 VK_FORMAT_R16G16B16A16_UNORM, 89 VK_FORMAT_R16G16B16A16_SNORM,
90 VK_FORMAT_R16G16_UNORM, 90 VK_FORMAT_R16G16B16A16_UNORM,
91 VK_FORMAT_R16G16_SNORM, 91 VK_FORMAT_R16G16_UNORM,
92 VK_FORMAT_R16G16_SFLOAT, 92 VK_FORMAT_R16G16_SNORM,
93 VK_FORMAT_R16_UNORM, 93 VK_FORMAT_R16G16_SFLOAT,
94 VK_FORMAT_R8G8B8A8_SRGB, 94 VK_FORMAT_R16_UNORM,
95 VK_FORMAT_R8G8_UNORM, 95 VK_FORMAT_R16_UINT,
96 VK_FORMAT_R8G8_SNORM, 96 VK_FORMAT_R8G8B8A8_SRGB,
97 VK_FORMAT_R8G8_UINT, 97 VK_FORMAT_R8G8_UNORM,
98 VK_FORMAT_R8_UNORM, 98 VK_FORMAT_R8G8_SNORM,
99 VK_FORMAT_R8_UINT, 99 VK_FORMAT_R8G8_UINT,
100 VK_FORMAT_B10G11R11_UFLOAT_PACK32, 100 VK_FORMAT_R8_UNORM,
101 VK_FORMAT_R32_SFLOAT, 101 VK_FORMAT_R8_UINT,
102 VK_FORMAT_R32_UINT, 102 VK_FORMAT_B10G11R11_UFLOAT_PACK32,
103 VK_FORMAT_R32_SINT, 103 VK_FORMAT_R32_SFLOAT,
104 VK_FORMAT_R16_SFLOAT, 104 VK_FORMAT_R32_UINT,
105 VK_FORMAT_R16G16B16A16_SFLOAT, 105 VK_FORMAT_R32_SINT,
106 VK_FORMAT_B8G8R8A8_UNORM, 106 VK_FORMAT_R16_SFLOAT,
107 VK_FORMAT_B8G8R8A8_SRGB, 107 VK_FORMAT_R16G16B16A16_SFLOAT,
108 VK_FORMAT_R4G4B4A4_UNORM_PACK16, 108 VK_FORMAT_B8G8R8A8_UNORM,
109 VK_FORMAT_D32_SFLOAT, 109 VK_FORMAT_B8G8R8A8_SRGB,
110 VK_FORMAT_D16_UNORM, 110 VK_FORMAT_R4G4B4A4_UNORM_PACK16,
111 VK_FORMAT_D16_UNORM_S8_UINT, 111 VK_FORMAT_D32_SFLOAT,
112 VK_FORMAT_D24_UNORM_S8_UINT, 112 VK_FORMAT_D16_UNORM,
113 VK_FORMAT_D32_SFLOAT_S8_UINT, 113 VK_FORMAT_D16_UNORM_S8_UINT,
114 VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 114 VK_FORMAT_D24_UNORM_S8_UINT,
115 VK_FORMAT_BC2_UNORM_BLOCK, 115 VK_FORMAT_D32_SFLOAT_S8_UINT,
116 VK_FORMAT_BC3_UNORM_BLOCK, 116 VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
117 VK_FORMAT_BC4_UNORM_BLOCK, 117 VK_FORMAT_BC2_UNORM_BLOCK,
118 VK_FORMAT_BC5_UNORM_BLOCK, 118 VK_FORMAT_BC3_UNORM_BLOCK,
119 VK_FORMAT_BC5_SNORM_BLOCK, 119 VK_FORMAT_BC4_UNORM_BLOCK,
120 VK_FORMAT_BC7_UNORM_BLOCK, 120 VK_FORMAT_BC5_UNORM_BLOCK,
121 VK_FORMAT_BC6H_UFLOAT_BLOCK, 121 VK_FORMAT_BC5_SNORM_BLOCK,
122 VK_FORMAT_BC6H_SFLOAT_BLOCK, 122 VK_FORMAT_BC7_UNORM_BLOCK,
123 VK_FORMAT_BC1_RGBA_SRGB_BLOCK, 123 VK_FORMAT_BC6H_UFLOAT_BLOCK,
124 VK_FORMAT_BC2_SRGB_BLOCK, 124 VK_FORMAT_BC6H_SFLOAT_BLOCK,
125 VK_FORMAT_BC3_SRGB_BLOCK, 125 VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
126 VK_FORMAT_BC7_SRGB_BLOCK, 126 VK_FORMAT_BC2_SRGB_BLOCK,
127 VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 127 VK_FORMAT_BC3_SRGB_BLOCK,
128 VK_FORMAT_ASTC_8x8_SRGB_BLOCK, 128 VK_FORMAT_BC7_SRGB_BLOCK,
129 VK_FORMAT_ASTC_8x5_SRGB_BLOCK, 129 VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
130 VK_FORMAT_ASTC_5x4_SRGB_BLOCK, 130 VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
131 VK_FORMAT_ASTC_5x5_UNORM_BLOCK, 131 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
132 VK_FORMAT_ASTC_5x5_SRGB_BLOCK, 132 VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
133 VK_FORMAT_ASTC_10x8_UNORM_BLOCK, 133 VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
134 VK_FORMAT_ASTC_10x8_SRGB_BLOCK, 134 VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
135 VK_FORMAT_ASTC_6x6_UNORM_BLOCK, 135 VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
136 VK_FORMAT_ASTC_6x6_SRGB_BLOCK, 136 VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
137 VK_FORMAT_ASTC_10x10_UNORM_BLOCK, 137 VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
138 VK_FORMAT_ASTC_10x10_SRGB_BLOCK, 138 VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
139 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, 139 VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
140 VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 140 VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
141 VK_FORMAT_ASTC_8x6_UNORM_BLOCK, 141 VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
142 VK_FORMAT_ASTC_8x6_SRGB_BLOCK, 142 VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
143 VK_FORMAT_ASTC_6x5_UNORM_BLOCK, 143 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
144 VK_FORMAT_ASTC_6x5_SRGB_BLOCK, 144 VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
145 VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; 145 VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
146 VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
147 VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
148 };
146 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 149 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
147 for (const auto format : formats) { 150 for (const auto format : formats) {
148 format_properties.emplace(format, physical.GetFormatProperties(format)); 151 format_properties.emplace(format, physical.GetFormatProperties(format));
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a5c7b7945..65a1c6245 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
312 ASSERT(point_size != 0.0f); 312 ASSERT(point_size != 0.0f);
313 } 313 }
314 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { 314 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
315 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); 315 const auto& attribute = fixed_state.vertex_input.attributes[i];
316 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
317 specialization.attribute_types[i] = attribute.Type();
316 } 318 }
317 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 319 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
318 320
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index be5b77fae..a3d992ed3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
877 877
878 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 878 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
879 const auto& attrib = regs.vertex_attrib_format[index]; 879 const auto& attrib = regs.vertex_attrib_format[index];
880 if (!attrib.IsValid()) { 880 if (attrib.IsConstant()) {
881 vertex_input.SetAttribute(index, false, 0, 0, {}, {}); 881 vertex_input.SetAttribute(index, false, 0, 0, {}, {});
882 continue; 882 continue;
883 } 883 }
884
885 [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
886 ASSERT(buffer.IsEnabled());
887
888 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), 884 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
889 attrib.size.Value()); 885 attrib.size.Value());
890 } 886 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 890f34a2c..a13e8baa7 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -741,8 +741,10 @@ private:
741 if (!IsGenericAttribute(index)) { 741 if (!IsGenericAttribute(index)) {
742 continue; 742 continue;
743 } 743 }
744
745 const u32 location = GetGenericAttributeLocation(index); 744 const u32 location = GetGenericAttributeLocation(index);
745 if (!IsAttributeEnabled(location)) {
746 continue;
747 }
746 const auto type_descriptor = GetAttributeType(location); 748 const auto type_descriptor = GetAttributeType(location);
747 Id type; 749 Id type;
748 if (IsInputAttributeArray()) { 750 if (IsInputAttributeArray()) {
@@ -986,6 +988,10 @@ private:
986 return stage == ShaderType::TesselationControl; 988 return stage == ShaderType::TesselationControl;
987 } 989 }
988 990
991 bool IsAttributeEnabled(u32 location) const {
992 return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
993 }
994
989 u32 GetNumInputVertices() const { 995 u32 GetNumInputVertices() const {
990 switch (stage) { 996 switch (stage) {
991 case ShaderType::Geometry: 997 case ShaderType::Geometry:
@@ -1201,16 +1207,20 @@ private:
1201 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); 1207 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1202 return {v_float_zero, Type::Float}; 1208 return {v_float_zero, Type::Float};
1203 default: 1209 default:
1204 if (IsGenericAttribute(attribute)) { 1210 if (!IsGenericAttribute(attribute)) {
1205 const u32 location = GetGenericAttributeLocation(attribute); 1211 break;
1206 const auto type_descriptor = GetAttributeType(location);
1207 const Type type = type_descriptor.type;
1208 const Id attribute_id = input_attributes.at(attribute);
1209 const std::vector elements = {element};
1210 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1211 return {OpLoad(GetTypeDefinition(type), pointer), type};
1212 } 1212 }
1213 break; 1213 const u32 location = GetGenericAttributeLocation(attribute);
1214 if (!IsAttributeEnabled(location)) {
1215 // Disabled attributes (also known as constant attributes) always return zero.
1216 return {v_float_zero, Type::Float};
1217 }
1218 const auto type_descriptor = GetAttributeType(location);
1219 const Type type = type_descriptor.type;
1220 const Id attribute_id = input_attributes.at(attribute);
1221 const std::vector elements = {element};
1222 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1223 return {OpLoad(GetTypeDefinition(type), pointer), type};
1214 } 1224 }
1215 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 1225 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
1216 return {v_float_zero, Type::Float}; 1226 return {v_float_zero, Type::Float};
@@ -2215,8 +2225,8 @@ private:
2215 return {}; 2225 return {};
2216 } 2226 }
2217 2227
2218 Expression MemoryBarrierGL(Operation) { 2228 template <spv::Scope scope>
2219 const auto scope = spv::Scope::Device; 2229 Expression MemoryBarrier(Operation) {
2220 const auto semantics = 2230 const auto semantics =
2221 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | 2231 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
2222 spv::MemorySemanticsMask::WorkgroupMemory | 2232 spv::MemorySemanticsMask::WorkgroupMemory |
@@ -2681,7 +2691,8 @@ private:
2681 &SPIRVDecompiler::ShuffleIndexed, 2691 &SPIRVDecompiler::ShuffleIndexed,
2682 2692
2683 &SPIRVDecompiler::Barrier, 2693 &SPIRVDecompiler::Barrier,
2684 &SPIRVDecompiler::MemoryBarrierGL, 2694 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
2695 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
2685 }; 2696 };
2686 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2697 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2687 2698
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f4c05ac3c..b7af26388 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -88,7 +88,8 @@ struct Specialization final {
88 u32 shared_memory_size{}; 88 u32 shared_memory_size{};
89 89
90 // Graphics specific 90 // Graphics specific
91 std::optional<float> point_size{}; 91 std::optional<float> point_size;
92 std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
92 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 93 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
93 bool ndc_minus_one_to_one{}; 94 bool ndc_minus_one_to_one{};
94}; 95};
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 694b325e1..c0a8f233f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
83 return Operation(OperationCode::YNegate); 83 return Operation(OperationCode::YNegate);
84 case SystemVariable::InvocationInfo: 84 case SystemVariable::InvocationInfo:
85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); 85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
86 return Immediate(0U); 86 return Immediate(0x00ff'0000U);
87 case SystemVariable::WscaleFactorXY: 87 case SystemVariable::WscaleFactorXY:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); 88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
89 return Immediate(0U); 89 return Immediate(0U);
@@ -299,9 +299,19 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
299 break; 299 break;
300 } 300 }
301 case OpCode::Id::MEMBAR: { 301 case OpCode::Id::MEMBAR: {
302 UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
303 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); 302 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
304 bb.push_back(Operation(OperationCode::MemoryBarrierGL)); 303 const OperationCode type = [instr] {
304 switch (instr.membar.type) {
305 case Tegra::Shader::MembarType::CTA:
306 return OperationCode::MemoryBarrierGroup;
307 case Tegra::Shader::MembarType::GL:
308 return OperationCode::MemoryBarrierGlobal;
309 default:
310 UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
311 return OperationCode::MemoryBarrierGlobal;
312 }
313 }();
314 bb.push_back(Operation(type));
305 break; 315 break;
306 } 316 }
307 case OpCode::Id::DEPBAR: { 317 case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c06512413..c5e5165ff 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -233,8 +233,9 @@ enum class OperationCode {
233 ThreadLtMask, /// () -> uint 233 ThreadLtMask, /// () -> uint
234 ShuffleIndexed, /// (uint value, uint index) -> uint 234 ShuffleIndexed, /// (uint value, uint index) -> uint
235 235
236 Barrier, /// () -> void 236 Barrier, /// () -> void
237 MemoryBarrierGL, /// () -> void 237 MemoryBarrierGroup, /// () -> void
238 MemoryBarrierGlobal, /// () -> void
238 239
239 Amount, 240 Amount,
240}; 241};
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7032e0059..f476f03b0 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 77> DefinitionTable = {{ 44constexpr std::array<Table, 78> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{
98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, 98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, 99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, 100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, 102 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
102 103
103 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, 104 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8bfc541d4..45e3ddd2c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
14#include <unordered_map> 14#include <unordered_map>
15#include <vector> 15#include <vector>
16 16
17#include <boost/container/small_vector.hpp>
17#include <boost/icl/interval_map.hpp> 18#include <boost/icl/interval_map.hpp>
18#include <boost/range/iterator_range.hpp> 19#include <boost/range/iterator_range.hpp>
19 20
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
53 54
54template <typename TSurface, typename TView> 55template <typename TSurface, typename TView>
55class TextureCache { 56class TextureCache {
57 using VectorSurface = boost::container::small_vector<TSurface, 1>;
56 58
57public: 59public:
58 void InvalidateRegion(VAddr addr, std::size_t size) { 60 void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -308,18 +310,20 @@ public:
308 dst_surface.first->MarkAsModified(true, Tick()); 310 dst_surface.first->MarkAsModified(true, Tick());
309 } 311 }
310 312
311 TSurface TryFindFramebufferSurface(VAddr addr) { 313 TSurface TryFindFramebufferSurface(VAddr addr) const {
312 if (!addr) { 314 if (!addr) {
313 return nullptr; 315 return nullptr;
314 } 316 }
315 const VAddr page = addr >> registry_page_bits; 317 const VAddr page = addr >> registry_page_bits;
316 std::vector<TSurface>& list = registry[page]; 318 const auto it = registry.find(page);
317 for (auto& surface : list) { 319 if (it == registry.end()) {
318 if (surface->GetCpuAddr() == addr) { 320 return nullptr;
319 return surface;
320 }
321 } 321 }
322 return nullptr; 322 const auto& list = it->second;
323 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
324 return surface->GetCpuAddr() == addr;
325 });
326 return found != list.end() ? *found : nullptr;
323 } 327 }
324 328
325 u64 Tick() { 329 u64 Tick() {
@@ -498,7 +502,7 @@ private:
498 * @param untopological Indicates to the recycler that the texture has no way 502 * @param untopological Indicates to the recycler that the texture has no way
499 * to match the overlaps due to topological reasons. 503 * to match the overlaps due to topological reasons.
500 **/ 504 **/
501 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 505 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
502 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 506 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
503 if (Settings::IsGPULevelExtreme()) { 507 if (Settings::IsGPULevelExtreme()) {
504 return RecycleStrategy::Flush; 508 return RecycleStrategy::Flush;
@@ -538,9 +542,8 @@ private:
538 * @param untopological Indicates to the recycler that the texture has no way to match the 542 * @param untopological Indicates to the recycler that the texture has no way to match the
539 * overlaps due to topological reasons. 543 * overlaps due to topological reasons.
540 **/ 544 **/
541 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 545 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
542 const SurfaceParams& params, const GPUVAddr gpu_addr, 546 const GPUVAddr gpu_addr, const bool preserve_contents,
543 const bool preserve_contents,
544 const MatchTopologyResult untopological) { 547 const MatchTopologyResult untopological) {
545 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); 548 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
546 for (auto& surface : overlaps) { 549 for (auto& surface : overlaps) {
@@ -650,7 +653,7 @@ private:
650 * @param params The parameters on the new surface. 653 * @param params The parameters on the new surface.
651 * @param gpu_addr The starting address of the new surface. 654 * @param gpu_addr The starting address of the new surface.
652 **/ 655 **/
653 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, 656 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
654 const SurfaceParams& params, 657 const SurfaceParams& params,
655 const GPUVAddr gpu_addr) { 658 const GPUVAddr gpu_addr) {
656 if (params.target == SurfaceTarget::Texture3D) { 659 if (params.target == SurfaceTarget::Texture3D) {
@@ -708,7 +711,7 @@ private:
708 * @param preserve_contents Indicates that the new surface should be loaded from memory or 711 * @param preserve_contents Indicates that the new surface should be loaded from memory or
709 * left blank. 712 * left blank.
710 */ 713 */
711 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 714 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
712 const SurfaceParams& params, 715 const SurfaceParams& params,
713 const GPUVAddr gpu_addr, 716 const GPUVAddr gpu_addr,
714 const VAddr cpu_addr, 717 const VAddr cpu_addr,
@@ -810,7 +813,7 @@ private:
810 TSurface& current_surface = iter->second; 813 TSurface& current_surface = iter->second;
811 const auto topological_result = current_surface->MatchesTopology(params); 814 const auto topological_result = current_surface->MatchesTopology(params);
812 if (topological_result != MatchTopologyResult::FullMatch) { 815 if (topological_result != MatchTopologyResult::FullMatch) {
813 std::vector<TSurface> overlaps{current_surface}; 816 VectorSurface overlaps{current_surface};
814 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 817 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
815 topological_result); 818 topological_result);
816 } 819 }
@@ -1126,23 +1129,25 @@ private:
1126 } 1129 }
1127 } 1130 }
1128 1131
1129 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1132 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1130 if (size == 0) { 1133 if (size == 0) {
1131 return {}; 1134 return {};
1132 } 1135 }
1133 const VAddr cpu_addr_end = cpu_addr + size; 1136 const VAddr cpu_addr_end = cpu_addr + size;
1134 VAddr start = cpu_addr >> registry_page_bits;
1135 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1137 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1136 std::vector<TSurface> surfaces; 1138 VectorSurface surfaces;
1137 while (start <= end) { 1139 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1138 std::vector<TSurface>& list = registry[start]; 1140 const auto it = registry.find(start);
1139 for (auto& surface : list) { 1141 if (it == registry.end()) {
1140 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { 1142 continue;
1141 surface->MarkAsPicked(true); 1143 }
1142 surfaces.push_back(surface); 1144 for (auto& surface : it->second) {
1145 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
1146 continue;
1143 } 1147 }
1148 surface->MarkAsPicked(true);
1149 surfaces.push_back(surface);
1144 } 1150 }
1145 start++;
1146 } 1151 }
1147 for (auto& surface : surfaces) { 1152 for (auto& surface : surfaces) {
1148 surface->MarkAsPicked(false); 1153 surface->MarkAsPicked(false);
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 1adf8932b..1f5e43043 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -106,6 +106,9 @@ public:
106 format.setVersion(4, 3); 106 format.setVersion(4, 3);
107 format.setProfile(QSurfaceFormat::CompatibilityProfile); 107 format.setProfile(QSurfaceFormat::CompatibilityProfile);
108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); 108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
109 if (Settings::values.renderer_debug) {
110 format.setOption(QSurfaceFormat::FormatOption::DebugContext);
111 }
109 // TODO: expose a setting for buffer value (ie default/single/double/triple) 112 // TODO: expose a setting for buffer value (ie default/single/double/triple)
110 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); 113 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
111 format.setSwapInterval(0); 114 format.setSwapInterval(0);
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 411e7e647..09cc0a3b5 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); 98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); 99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); 100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
101 if (Settings::values.renderer_debug) {
102 SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
103 }
101 SDL_GL_SetSwapInterval(0); 104 SDL_GL_SetSwapInterval(0);
102 105
103 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, 106 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,