summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/service/hid/hid.cpp13
-rw-r--r--src/core/hle/service/hid/hid.h1
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h40
-rw-r--r--src/video_core/engines/maxwell_3d.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp74
-rw-r--r--src/video_core/renderer_opengl/gl_device.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp107
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp109
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h6
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp143
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp37
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.h3
-rw-r--r--src/video_core/shader/decode/other.cpp16
-rw-r--r--src/video_core/shader/node.h5
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/texture_cache.h124
-rw-r--r--src/yuzu/bootmanager.cpp3
-rw-r--r--src/yuzu/configuration/configure_input_player.cpp3
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp3
27 files changed, 467 insertions, 276 deletions
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index c84cb1483..72a050de2 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -161,7 +161,7 @@ Hid::Hid(Core::System& system) : ServiceFramework("hid"), system(system) {
161 {40, nullptr, "AcquireXpadIdEventHandle"}, 161 {40, nullptr, "AcquireXpadIdEventHandle"},
162 {41, nullptr, "ReleaseXpadIdEventHandle"}, 162 {41, nullptr, "ReleaseXpadIdEventHandle"},
163 {51, &Hid::ActivateXpad, "ActivateXpad"}, 163 {51, &Hid::ActivateXpad, "ActivateXpad"},
164 {55, nullptr, "GetXpadIds"}, 164 {55, &Hid::GetXpadIDs, "GetXpadIds"},
165 {56, nullptr, "ActivateJoyXpad"}, 165 {56, nullptr, "ActivateJoyXpad"},
166 {58, nullptr, "GetJoyXpadLifoHandle"}, 166 {58, nullptr, "GetJoyXpadLifoHandle"},
167 {59, nullptr, "GetJoyXpadIds"}, 167 {59, nullptr, "GetJoyXpadIds"},
@@ -319,6 +319,17 @@ void Hid::ActivateXpad(Kernel::HLERequestContext& ctx) {
319 rb.Push(RESULT_SUCCESS); 319 rb.Push(RESULT_SUCCESS);
320} 320}
321 321
322void Hid::GetXpadIDs(Kernel::HLERequestContext& ctx) {
323 IPC::RequestParser rp{ctx};
324 const auto applet_resource_user_id{rp.Pop<u64>()};
325
326 LOG_DEBUG(Service_HID, "(STUBBED) called, applet_resource_user_id={}", applet_resource_user_id);
327
328 IPC::ResponseBuilder rb{ctx, 3};
329 rb.Push(RESULT_SUCCESS);
330 rb.Push(0);
331}
332
322void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) { 333void Hid::ActivateDebugPad(Kernel::HLERequestContext& ctx) {
323 IPC::RequestParser rp{ctx}; 334 IPC::RequestParser rp{ctx};
324 const auto applet_resource_user_id{rp.Pop<u64>()}; 335 const auto applet_resource_user_id{rp.Pop<u64>()};
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index c8ed4ad8b..d481a75f8 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -86,6 +86,7 @@ public:
86private: 86private:
87 void CreateAppletResource(Kernel::HLERequestContext& ctx); 87 void CreateAppletResource(Kernel::HLERequestContext& ctx);
88 void ActivateXpad(Kernel::HLERequestContext& ctx); 88 void ActivateXpad(Kernel::HLERequestContext& ctx);
89 void GetXpadIDs(Kernel::HLERequestContext& ctx);
89 void ActivateDebugPad(Kernel::HLERequestContext& ctx); 90 void ActivateDebugPad(Kernel::HLERequestContext& ctx);
90 void ActivateTouchScreen(Kernel::HLERequestContext& ctx); 91 void ActivateTouchScreen(Kernel::HLERequestContext& ctx);
91 void ActivateMouse(Kernel::HLERequestContext& ctx); 92 void ActivateMouse(Kernel::HLERequestContext& ctx);
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d9a4a1b4d..b88fce2cd 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -56,24 +56,28 @@ public:
56 if (use_fast_cbuf || size < max_stream_size) { 56 if (use_fast_cbuf || size < max_stream_size) {
57 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { 57 if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
58 auto& memory_manager = system.GPU().MemoryManager(); 58 auto& memory_manager = system.GPU().MemoryManager();
59 const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
59 if (use_fast_cbuf) { 60 if (use_fast_cbuf) {
60 if (memory_manager.IsGranularRange(gpu_addr, size)) { 61 u8* dest;
61 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 62 if (is_granular) {
62 return ConstBufferUpload(host_ptr, size); 63 dest = memory_manager.GetPointer(gpu_addr);
63 } else { 64 } else {
64 staging_buffer.resize(size); 65 staging_buffer.resize(size);
65 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); 66 dest = staging_buffer.data();
66 return ConstBufferUpload(staging_buffer.data(), size); 67 memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
67 } 68 }
69 return ConstBufferUpload(dest, size);
70 }
71 if (is_granular) {
72 u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
73 return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
74 std::memcpy(dest, host_ptr, size);
75 });
68 } else { 76 } else {
69 if (memory_manager.IsGranularRange(gpu_addr, size)) { 77 return StreamBufferUpload(
70 const auto host_ptr = memory_manager.GetPointer(gpu_addr); 78 size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
71 return StreamBufferUpload(host_ptr, size, alignment); 79 memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
72 } else { 80 });
73 staging_buffer.resize(size);
74 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
75 return StreamBufferUpload(staging_buffer.data(), size, alignment);
76 }
77 } 81 }
78 } 82 }
79 } 83 }
@@ -101,7 +105,9 @@ public:
101 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, 105 BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
102 std::size_t alignment = 4) { 106 std::size_t alignment = 4) {
103 std::lock_guard lock{mutex}; 107 std::lock_guard lock{mutex};
104 return StreamBufferUpload(raw_pointer, size, alignment); 108 return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) {
109 std::memcpy(dest, raw_pointer, size);
110 });
105 } 111 }
106 112
107 void Map(std::size_t max_size) { 113 void Map(std::size_t max_size) {
@@ -424,11 +430,11 @@ private:
424 map->MarkAsModified(false, 0); 430 map->MarkAsModified(false, 0);
425 } 431 }
426 432
427 BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, 433 template <typename Callable>
428 std::size_t alignment) { 434 BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) {
429 AlignBuffer(alignment); 435 AlignBuffer(alignment);
430 const std::size_t uploaded_offset = buffer_offset; 436 const std::size_t uploaded_offset = buffer_offset;
431 std::memcpy(buffer_ptr, raw_pointer, size); 437 callable(buffer_ptr);
432 438
433 buffer_ptr += size; 439 buffer_ptr += size;
434 buffer_offset += size; 440 buffer_offset += size;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 951016c3e..e46b153f9 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -105,7 +105,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
105 regs.rasterize_enable = 1; 105 regs.rasterize_enable = 1;
106 regs.rt_separate_frag_data = 1; 106 regs.rt_separate_frag_data = 1;
107 regs.framebuffer_srgb = 1; 107 regs.framebuffer_srgb = 1;
108 regs.line_width_aliased = 1.0f;
109 regs.line_width_smooth = 1.0f;
108 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; 110 regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise;
111 regs.polygon_mode_back = Maxwell3D::Regs::PolygonMode::Fill;
112 regs.polygon_mode_front = Maxwell3D::Regs::PolygonMode::Fill;
109 113
110 shadow_state = regs; 114 shadow_state = regs;
111 115
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 466a911db..b772c37d9 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <cstddef> 7#include <cstddef>
8#include <cstring> 8#include <cstring>
9#include <limits>
9#include <optional> 10#include <optional>
10#include <vector> 11#include <vector>
11 12
@@ -26,24 +27,27 @@ constexpr u32 ReservedUniformBlocks = 1;
26 27
27constexpr u32 NumStages = 5; 28constexpr u32 NumStages = 5;
28 29
29constexpr std::array LimitUBOs = {GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, 30constexpr std::array LimitUBOs = {
30 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, 31 GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
31 GL_MAX_GEOMETRY_UNIFORM_BLOCKS, GL_MAX_FRAGMENT_UNIFORM_BLOCKS}; 32 GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
33 GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
32 34
33constexpr std::array LimitSSBOs = { 35constexpr std::array LimitSSBOs = {
34 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, 36 GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
35 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, 37 GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
36 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS}; 38 GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
37 39
38constexpr std::array LimitSamplers = { 40constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
39 GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, 41 GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
40 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, 42 GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
41 GL_MAX_TEXTURE_IMAGE_UNITS}; 43 GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
44 GL_MAX_TEXTURE_IMAGE_UNITS,
45 GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
42 46
43constexpr std::array LimitImages = {GL_MAX_VERTEX_IMAGE_UNIFORMS, 47constexpr std::array LimitImages = {
44 GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, 48 GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
45 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, 49 GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
46 GL_MAX_GEOMETRY_IMAGE_UNIFORMS, GL_MAX_FRAGMENT_IMAGE_UNIFORMS}; 50 GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
47 51
48template <typename T> 52template <typename T>
49T GetInteger(GLenum pname) { 53T GetInteger(GLenum pname) {
@@ -85,6 +89,13 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
85 return std::exchange(base, base + amount); 89 return std::exchange(base, base + amount);
86} 90}
87 91
92std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
93 std::array<u32, Tegra::Engines::MaxShaderTypes> max;
94 std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
95 [](GLenum pname) { return GetInteger<u32>(pname); });
96 return max;
97}
98
88std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { 99std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
89 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; 100 std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
90 101
@@ -133,6 +144,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
133} 144}
134 145
135bool IsASTCSupported() { 146bool IsASTCSupported() {
147 static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
136 static constexpr std::array formats = { 148 static constexpr std::array formats = {
137 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, 149 GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR,
138 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, 150 GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR,
@@ -149,25 +161,35 @@ bool IsASTCSupported() {
149 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, 161 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR,
150 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, 162 GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR,
151 }; 163 };
152 return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { 164 static constexpr std::array required_support = {
153 GLint supported; 165 GL_VERTEX_TEXTURE, GL_TESS_CONTROL_TEXTURE, GL_TESS_EVALUATION_TEXTURE,
154 glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, 166 GL_GEOMETRY_TEXTURE, GL_FRAGMENT_TEXTURE, GL_COMPUTE_TEXTURE,
155 &supported); 167 };
156 return supported == GL_TRUE; 168
157 }) == formats.end(); 169 for (const GLenum target : targets) {
170 for (const GLenum format : formats) {
171 for (const GLenum support : required_support) {
172 GLint value;
173 glGetInternalformativ(GL_TEXTURE_2D, format, support, 1, &value);
174 if (value != GL_FULL_SUPPORT) {
175 return false;
176 }
177 }
178 }
179 }
180 return true;
158} 181}
159 182
160} // Anonymous namespace 183} // Anonymous namespace
161 184
162Device::Device() : base_bindings{BuildBaseBindings()} { 185Device::Device()
186 : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
163 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); 187 const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
164 const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); 188 const auto renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
165 const std::vector extensions = GetExtensions(); 189 const std::vector extensions = GetExtensions();
166 190
167 const bool is_nvidia = vendor == "NVIDIA Corporation"; 191 const bool is_nvidia = vendor == "NVIDIA Corporation";
168 const bool is_amd = vendor == "ATI Technologies Inc."; 192 const bool is_amd = vendor == "ATI Technologies Inc.";
169 const bool is_intel = vendor == "Intel";
170 const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
171 193
172 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 194 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
173 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 195 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -182,7 +204,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
182 has_variable_aoffi = TestVariableAoffi(); 204 has_variable_aoffi = TestVariableAoffi();
183 has_component_indexing_bug = is_amd; 205 has_component_indexing_bug = is_amd;
184 has_precise_bug = TestPreciseBug(); 206 has_precise_bug = TestPreciseBug();
185 has_broken_compute = is_intel_proprietary;
186 has_fast_buffer_sub_data = is_nvidia; 207 has_fast_buffer_sub_data = is_nvidia;
187 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && 208 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
188 GLAD_GL_NV_compute_program5; 209 GLAD_GL_NV_compute_program5;
@@ -197,7 +218,9 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
197} 218}
198 219
199Device::Device(std::nullptr_t) { 220Device::Device(std::nullptr_t) {
200 uniform_buffer_alignment = 0; 221 max_uniform_buffers.fill(std::numeric_limits<u32>::max());
222 uniform_buffer_alignment = 4;
223 shader_storage_alignment = 4;
201 max_vertex_attributes = 16; 224 max_vertex_attributes = 16;
202 max_varyings = 15; 225 max_varyings = 15;
203 has_warp_intrinsics = true; 226 has_warp_intrinsics = true;
@@ -205,9 +228,6 @@ Device::Device(std::nullptr_t) {
205 has_vertex_viewport_layer = true; 228 has_vertex_viewport_layer = true;
206 has_image_load_formatted = true; 229 has_image_load_formatted = true;
207 has_variable_aoffi = true; 230 has_variable_aoffi = true;
208 has_component_indexing_bug = false;
209 has_broken_compute = false;
210 has_precise_bug = false;
211} 231}
212 232
213bool Device::TestVariableAoffi() { 233bool Device::TestVariableAoffi() {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index e915dbd86..98cca0254 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -24,6 +24,10 @@ public:
24 explicit Device(); 24 explicit Device();
25 explicit Device(std::nullptr_t); 25 explicit Device(std::nullptr_t);
26 26
27 u32 GetMaxUniformBuffers(Tegra::Engines::ShaderType shader_type) const noexcept {
28 return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
29 }
30
27 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { 31 const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
28 return base_bindings[stage_index]; 32 return base_bindings[stage_index];
29 } 33 }
@@ -80,10 +84,6 @@ public:
80 return has_precise_bug; 84 return has_precise_bug;
81 } 85 }
82 86
83 bool HasBrokenCompute() const {
84 return has_broken_compute;
85 }
86
87 bool HasFastBufferSubData() const { 87 bool HasFastBufferSubData() const {
88 return has_fast_buffer_sub_data; 88 return has_fast_buffer_sub_data;
89 } 89 }
@@ -96,7 +96,8 @@ private:
96 static bool TestVariableAoffi(); 96 static bool TestVariableAoffi();
97 static bool TestPreciseBug(); 97 static bool TestPreciseBug();
98 98
99 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings; 99 std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
100 std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
100 std::size_t uniform_buffer_alignment{}; 101 std::size_t uniform_buffer_alignment{};
101 std::size_t shader_storage_alignment{}; 102 std::size_t shader_storage_alignment{};
102 u32 max_vertex_attributes{}; 103 u32 max_vertex_attributes{};
@@ -109,7 +110,6 @@ private:
109 bool has_variable_aoffi{}; 110 bool has_variable_aoffi{};
110 bool has_component_indexing_bug{}; 111 bool has_component_indexing_bug{};
111 bool has_precise_bug{}; 112 bool has_precise_bug{};
112 bool has_broken_compute{};
113 bool has_fast_buffer_sub_data{}; 113 bool has_fast_buffer_sub_data{};
114 bool use_assembly_shaders{}; 114 bool use_assembly_shaders{};
115}; 115};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 716d43e65..55e79aaf6 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -54,6 +54,12 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
54 54
55namespace { 55namespace {
56 56
57constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
58constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
59 NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
60constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
61 NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
62
57constexpr std::size_t NumSupportedVertexAttributes = 16; 63constexpr std::size_t NumSupportedVertexAttributes = 16;
58 64
59template <typename Engine, typename Entry> 65template <typename Engine, typename Entry>
@@ -104,6 +110,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
104 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { 110 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
105 CheckExtensions(); 111 CheckExtensions();
106 112
113 unified_uniform_buffer.Create();
114 glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
115
107 if (device.UseAssemblyShaders()) { 116 if (device.UseAssemblyShaders()) {
108 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); 117 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
109 for (const GLuint cbuf : staging_cbufs) { 118 for (const GLuint cbuf : staging_cbufs) {
@@ -655,10 +664,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
655} 664}
656 665
657void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 666void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
658 if (device.HasBrokenCompute()) {
659 return;
660 }
661
662 buffer_cache.Acquire(); 667 buffer_cache.Acquire();
663 current_cbuf = 0; 668 current_cbuf = 0;
664 669
@@ -846,34 +851,56 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
846 MICROPROFILE_SCOPE(OpenGL_UBO); 851 MICROPROFILE_SCOPE(OpenGL_UBO);
847 const auto& stages = system.GPU().Maxwell3D().state.shader_stages; 852 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
848 const auto& shader_stage = stages[stage_index]; 853 const auto& shader_stage = stages[stage_index];
854 const auto& entries = shader->GetEntries();
855 const bool use_unified = entries.use_unified_uniforms;
856 const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE;
849 857
850 u32 binding = 858 const auto base_bindings = device.GetBaseBindings(stage_index);
851 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer; 859 u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer;
852 for (const auto& entry : shader->GetEntries().const_buffers) { 860 for (const auto& entry : entries.const_buffers) {
853 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 861 const u32 index = entry.GetIndex();
854 SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry); 862 const auto& buffer = shader_stage.const_buffers[index];
863 SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified,
864 base_unified_offset + index * Maxwell::MaxConstBufferSize);
865 ++binding;
866 }
867 if (use_unified) {
868 const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer +
869 entries.global_memory_entries.size());
870 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle,
871 base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE);
855 } 872 }
856} 873}
857 874
858void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { 875void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
859 MICROPROFILE_SCOPE(OpenGL_UBO); 876 MICROPROFILE_SCOPE(OpenGL_UBO);
860 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 877 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
878 const auto& entries = kernel->GetEntries();
879 const bool use_unified = entries.use_unified_uniforms;
861 880
862 u32 binding = 0; 881 u32 binding = 0;
863 for (const auto& entry : kernel->GetEntries().const_buffers) { 882 for (const auto& entry : entries.const_buffers) {
864 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; 883 const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
865 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); 884 const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
866 Tegra::Engines::ConstBufferInfo buffer; 885 Tegra::Engines::ConstBufferInfo buffer;
867 buffer.address = config.Address(); 886 buffer.address = config.Address();
868 buffer.size = config.size; 887 buffer.size = config.size;
869 buffer.enabled = mask[entry.GetIndex()]; 888 buffer.enabled = mask[entry.GetIndex()];
870 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry); 889 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry,
890 use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize);
891 ++binding;
892 }
893 if (use_unified) {
894 const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size());
895 glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0,
896 NUM_CONST_BUFFERS_BYTES_PER_STAGE);
871 } 897 }
872} 898}
873 899
874void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, 900void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
875 const Tegra::Engines::ConstBufferInfo& buffer, 901 const Tegra::Engines::ConstBufferInfo& buffer,
876 const ConstBufferEntry& entry) { 902 const ConstBufferEntry& entry, bool use_unified,
903 std::size_t unified_offset) {
877 if (!buffer.enabled) { 904 if (!buffer.enabled) {
878 // Set values to zero to unbind buffers 905 // Set values to zero to unbind buffers
879 if (device.UseAssemblyShaders()) { 906 if (device.UseAssemblyShaders()) {
@@ -889,20 +916,29 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
889 // UBO alignment requirements. 916 // UBO alignment requirements.
890 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); 917 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
891 918
892 const auto alignment = device.GetUniformBufferAlignment(); 919 const bool fast_upload = !use_unified && device.HasFastBufferSubData();
893 auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, 920
894 device.HasFastBufferSubData()); 921 const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment();
895 if (!device.UseAssemblyShaders()) { 922 const GPUVAddr gpu_addr = buffer.address;
896 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 923 auto [cbuf, offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload);
924
925 if (device.UseAssemblyShaders()) {
926 UNIMPLEMENTED_IF(use_unified);
927 if (offset != 0) {
928 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
929 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
930 cbuf = staging_cbuf;
931 offset = 0;
932 }
933 glBindBufferRangeNV(stage, binding, cbuf, offset, size);
897 return; 934 return;
898 } 935 }
899 if (offset != 0) { 936
900 const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; 937 if (use_unified) {
901 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size); 938 glCopyNamedBufferSubData(cbuf, unified_uniform_buffer.handle, offset, unified_offset, size);
902 cbuf = staging_cbuf; 939 } else {
903 offset = 0; 940 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
904 } 941 }
905 glBindBufferRangeNV(stage, binding, cbuf, offset, size);
906} 942}
907 943
908void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { 944void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@@ -1024,6 +1060,26 @@ void RasterizerOpenGL::SyncViewport() {
1024 const auto& regs = gpu.regs; 1060 const auto& regs = gpu.regs;
1025 1061
1026 const bool dirty_viewport = flags[Dirty::Viewports]; 1062 const bool dirty_viewport = flags[Dirty::Viewports];
1063 const bool dirty_clip_control = flags[Dirty::ClipControl];
1064
1065 if (dirty_clip_control || flags[Dirty::FrontFace]) {
1066 flags[Dirty::FrontFace] = false;
1067
1068 GLenum mode = MaxwellToGL::FrontFace(regs.front_face);
1069 if (regs.screen_y_control.triangle_rast_flip != 0 &&
1070 regs.viewport_transform[0].scale_y < 0.0f) {
1071 switch (mode) {
1072 case GL_CW:
1073 mode = GL_CCW;
1074 break;
1075 case GL_CCW:
1076 mode = GL_CW;
1077 break;
1078 }
1079 }
1080 glFrontFace(mode);
1081 }
1082
1027 if (dirty_viewport || flags[Dirty::ClipControl]) { 1083 if (dirty_viewport || flags[Dirty::ClipControl]) {
1028 flags[Dirty::ClipControl] = false; 1084 flags[Dirty::ClipControl] = false;
1029 1085
@@ -1121,11 +1177,6 @@ void RasterizerOpenGL::SyncCullMode() {
1121 glDisable(GL_CULL_FACE); 1177 glDisable(GL_CULL_FACE);
1122 } 1178 }
1123 } 1179 }
1124
1125 if (flags[Dirty::FrontFace]) {
1126 flags[Dirty::FrontFace] = false;
1127 glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
1128 }
1129} 1180}
1130 1181
1131void RasterizerOpenGL::SyncPrimitiveRestart() { 1182void RasterizerOpenGL::SyncPrimitiveRestart() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 87f7fe159..f5dc56a0e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -107,7 +107,8 @@ private:
107 107
108 /// Configures a constant buffer. 108 /// Configures a constant buffer.
109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
110 const ConstBufferEntry& entry); 110 const ConstBufferEntry& entry, bool use_unified,
111 std::size_t unified_offset);
111 112
112 /// Configures the current global memory entries to use for the draw command. 113 /// Configures the current global memory entries to use for the draw command.
113 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 114 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -253,6 +254,7 @@ private:
253 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; 254 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
254 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; 255 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
255 std::size_t current_cbuf = 0; 256 std::size_t current_cbuf = 0;
257 OGLBuffer unified_uniform_buffer;
256 258
257 /// Number of commands queued to the OpenGL driver. Reseted on flush. 259 /// Number of commands queued to the OpenGL driver. Reseted on flush.
258 std::size_t num_queued_commands = 0; 260 std::size_t num_queued_commands = 0;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4cd0f36cf..a991ca64a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -241,8 +241,9 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
241 entry.bindless_samplers = registry->GetBindlessSamplers(); 241 entry.bindless_samplers = registry->GetBindlessSamplers();
242 params.disk_cache.SaveEntry(std::move(entry)); 242 params.disk_cache.SaveEntry(std::move(entry));
243 243
244 return std::shared_ptr<CachedShader>(new CachedShader( 244 return std::shared_ptr<CachedShader>(
245 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); 245 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
246 MakeEntries(params.device, ir, shader_type), std::move(program)));
246} 247}
247 248
248Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 249Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
@@ -265,8 +266,9 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
265 entry.bindless_samplers = registry->GetBindlessSamplers(); 266 entry.bindless_samplers = registry->GetBindlessSamplers();
266 params.disk_cache.SaveEntry(std::move(entry)); 267 params.disk_cache.SaveEntry(std::move(entry));
267 268
268 return std::shared_ptr<CachedShader>(new CachedShader( 269 return std::shared_ptr<CachedShader>(
269 params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); 270 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
271 MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
270} 272}
271 273
272Shader CachedShader::CreateFromCache(const ShaderParameters& params, 274Shader CachedShader::CreateFromCache(const ShaderParameters& params,
@@ -348,7 +350,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
348 PrecompiledShader shader; 350 PrecompiledShader shader;
349 shader.program = std::move(program); 351 shader.program = std::move(program);
350 shader.registry = std::move(registry); 352 shader.registry = std::move(registry);
351 shader.entries = MakeEntries(ir); 353 shader.entries = MakeEntries(device, ir, entry.type);
352 354
353 std::scoped_lock lock{mutex}; 355 std::scoped_lock lock{mutex};
354 if (callback) { 356 if (callback) {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 253484968..502b95973 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -61,8 +61,8 @@ struct TextureDerivates {};
61using TextureArgument = std::pair<Type, Node>; 61using TextureArgument = std::pair<Type, Node>;
62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>; 62using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
63 63
64constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 64constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32);
65 static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); 65constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32);
66 66
67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt 67constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
68#define ftou floatBitsToUint 68#define ftou floatBitsToUint
@@ -402,6 +402,13 @@ std::string FlowStackTopName(MetaStackClass stack) {
402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); 402 return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
403} 403}
404 404
405bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) {
406 const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size());
407 // We waste one UBO for emulation
408 const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1;
409 return num_ubos > num_available_ubos;
410}
411
405struct GenericVaryingDescription { 412struct GenericVaryingDescription {
406 std::string name; 413 std::string name;
407 u8 first_element = 0; 414 u8 first_element = 0;
@@ -412,8 +419,9 @@ class GLSLDecompiler final {
412public: 419public:
413 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, 420 explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
414 ShaderType stage, std::string_view identifier, std::string_view suffix) 421 ShaderType stage, std::string_view identifier, std::string_view suffix)
415 : device{device}, ir{ir}, registry{registry}, stage{stage}, 422 : device{device}, ir{ir}, registry{registry}, stage{stage}, identifier{identifier},
416 identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { 423 suffix{suffix}, header{ir.GetHeader()}, use_unified_uniforms{
424 UseUnifiedUniforms(device, ir, stage)} {
417 if (stage != ShaderType::Compute) { 425 if (stage != ShaderType::Compute) {
418 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); 426 transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
419 } 427 }
@@ -834,12 +842,24 @@ private:
834 } 842 }
835 843
836 void DeclareConstantBuffers() { 844 void DeclareConstantBuffers() {
845 if (use_unified_uniforms) {
846 const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer +
847 static_cast<u32>(ir.GetGlobalMemory().size());
848 code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{",
849 binding);
850 code.AddLine(" uint cbufs[];");
851 code.AddLine("}};");
852 code.AddNewLine();
853 return;
854 }
855
837 u32 binding = device.GetBaseBindings(stage).uniform_buffer; 856 u32 binding = device.GetBaseBindings(stage).uniform_buffer;
838 for (const auto& buffers : ir.GetConstantBuffers()) { 857 for (const auto [index, info] : ir.GetConstantBuffers()) {
839 const auto index = buffers.first; 858 const u32 num_elements = Common::AlignUp(info.GetSize(), 4) / 4;
859 const u32 size = info.IsIndirect() ? MAX_CONSTBUFFER_ELEMENTS : num_elements;
840 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, 860 code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++,
841 GetConstBufferBlock(index)); 861 GetConstBufferBlock(index));
842 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); 862 code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), size);
843 code.AddLine("}};"); 863 code.AddLine("}};");
844 code.AddNewLine(); 864 code.AddNewLine();
845 } 865 }
@@ -1038,42 +1058,51 @@ private:
1038 1058
1039 if (const auto cbuf = std::get_if<CbufNode>(&*node)) { 1059 if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
1040 const Node offset = cbuf->GetOffset(); 1060 const Node offset = cbuf->GetOffset();
1061 const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS;
1062
1041 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 1063 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
1042 // Direct access 1064 // Direct access
1043 const u32 offset_imm = immediate->GetValue(); 1065 const u32 offset_imm = immediate->GetValue();
1044 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); 1066 ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
1045 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), 1067 if (use_unified_uniforms) {
1046 offset_imm / (4 * 4), (offset_imm / 4) % 4), 1068 return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4),
1047 Type::Uint}; 1069 Type::Uint};
1070 } else {
1071 return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
1072 offset_imm / (4 * 4), (offset_imm / 4) % 4),
1073 Type::Uint};
1074 }
1048 } 1075 }
1049 1076
1050 if (std::holds_alternative<OperationNode>(*offset)) { 1077 // Indirect access
1051 // Indirect access 1078 if (use_unified_uniforms) {
1052 const std::string final_offset = code.GenerateTemporary(); 1079 return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset,
1053 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); 1080 Visit(offset).AsUint()),
1081 Type::Uint};
1082 }
1054 1083
1055 if (!device.HasComponentIndexingBug()) { 1084 const std::string final_offset = code.GenerateTemporary();
1056 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), 1085 code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
1057 final_offset, final_offset),
1058 Type::Uint};
1059 }
1060 1086
1061 // AMD's proprietary GLSL compiler emits ill code for variable component access. 1087 if (!device.HasComponentIndexingBug()) {
1062 // To bypass this driver bug generate 4 ifs, one per each component. 1088 return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
1063 const std::string pack = code.GenerateTemporary(); 1089 final_offset, final_offset),
1064 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), 1090 Type::Uint};
1065 final_offset);
1066
1067 const std::string result = code.GenerateTemporary();
1068 code.AddLine("uint {};", result);
1069 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1070 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
1071 pack, GetSwizzle(swizzle));
1072 }
1073 return {result, Type::Uint};
1074 } 1091 }
1075 1092
1076 UNREACHABLE_MSG("Unmanaged offset node type"); 1093 // AMD's proprietary GLSL compiler emits ill code for variable component access.
1094 // To bypass this driver bug generate 4 ifs, one per each component.
1095 const std::string pack = code.GenerateTemporary();
1096 code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
1097 final_offset);
1098
1099 const std::string result = code.GenerateTemporary();
1100 code.AddLine("uint {};", result);
1101 for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
1102 code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, pack,
1103 GetSwizzle(swizzle));
1104 }
1105 return {result, Type::Uint};
1077 } 1106 }
1078 1107
1079 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 1108 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
@@ -2344,7 +2373,12 @@ private:
2344 return {}; 2373 return {};
2345 } 2374 }
2346 2375
2347 Expression MemoryBarrierGL(Operation) { 2376 Expression MemoryBarrierGroup(Operation) {
2377 code.AddLine("groupMemoryBarrier();");
2378 return {};
2379 }
2380
2381 Expression MemoryBarrierGlobal(Operation) {
2348 code.AddLine("memoryBarrier();"); 2382 code.AddLine("memoryBarrier();");
2349 return {}; 2383 return {};
2350 } 2384 }
@@ -2591,7 +2625,8 @@ private:
2591 &GLSLDecompiler::ShuffleIndexed, 2625 &GLSLDecompiler::ShuffleIndexed,
2592 2626
2593 &GLSLDecompiler::Barrier, 2627 &GLSLDecompiler::Barrier,
2594 &GLSLDecompiler::MemoryBarrierGL, 2628 &GLSLDecompiler::MemoryBarrierGroup,
2629 &GLSLDecompiler::MemoryBarrierGlobal,
2595 }; 2630 };
2596 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2631 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2597 2632
@@ -2704,6 +2739,7 @@ private:
2704 const std::string_view identifier; 2739 const std::string_view identifier;
2705 const std::string_view suffix; 2740 const std::string_view suffix;
2706 const Header header; 2741 const Header header;
2742 const bool use_unified_uniforms;
2707 std::unordered_map<u8, VaryingTFB> transform_feedback; 2743 std::unordered_map<u8, VaryingTFB> transform_feedback;
2708 2744
2709 ShaderWriter code; 2745 ShaderWriter code;
@@ -2899,7 +2935,7 @@ void GLSLDecompiler::DecompileAST() {
2899 2935
2900} // Anonymous namespace 2936} // Anonymous namespace
2901 2937
2902ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { 2938ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType stage) {
2903 ShaderEntries entries; 2939 ShaderEntries entries;
2904 for (const auto& cbuf : ir.GetConstantBuffers()) { 2940 for (const auto& cbuf : ir.GetConstantBuffers()) {
2905 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), 2941 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2920,6 +2956,7 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
2920 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; 2956 entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
2921 } 2957 }
2922 entries.shader_length = ir.GetLength(); 2958 entries.shader_length = ir.GetLength();
2959 entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage);
2923 return entries; 2960 return entries;
2924} 2961}
2925 2962
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e8a178764..451c9689a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -53,11 +53,13 @@ struct ShaderEntries {
53 std::vector<GlobalMemoryEntry> global_memory_entries; 53 std::vector<GlobalMemoryEntry> global_memory_entries;
54 std::vector<SamplerEntry> samplers; 54 std::vector<SamplerEntry> samplers;
55 std::vector<ImageEntry> images; 55 std::vector<ImageEntry> images;
56 u32 clip_distances{};
57 std::size_t shader_length{}; 56 std::size_t shader_length{};
57 u32 clip_distances{};
58 bool use_unified_uniforms{};
58}; 59};
59 60
60ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); 61ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
62 Tegra::Engines::ShaderType stage);
61 63
62std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 64std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
63 const VideoCommon::Shader::Registry& registry, 65 const VideoCommon::Shader::Registry& registry,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 4faa8b90c..57db5a08b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -404,8 +404,7 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
404 404
405CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, 405CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
406 bool is_proxy) 406 bool is_proxy)
407 : VideoCommon::ViewBase(params), surface{surface}, 407 : VideoCommon::ViewBase(params), surface{surface}, format{surface.internal_format},
408 format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
409 target{GetTextureTarget(params.target)}, is_proxy{is_proxy} { 408 target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
410 if (!is_proxy) { 409 if (!is_proxy) {
411 main_view = CreateTextureView(); 410 main_view = CreateTextureView();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 6b489e6db..e7952924a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
753bool RendererOpenGL::Init() { 753bool RendererOpenGL::Init() {
754 if (GLAD_GL_KHR_debug) { 754 if (GLAD_GL_KHR_debug) {
755 glEnable(GL_DEBUG_OUTPUT); 755 glEnable(GL_DEBUG_OUTPUT);
756 if (Settings::values.renderer_debug) {
757 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
758 }
756 glDebugMessageCallback(DebugHandler, nullptr); 759 glDebugMessageCallback(DebugHandler, nullptr);
757 } 760 }
758 761
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 568744e3c..424278816 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -71,8 +71,7 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept {
71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); 71 const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
72 72
73 u32 packed_front_face = PackFrontFace(regs.front_face); 73 u32 packed_front_face = PackFrontFace(regs.front_face);
74 if (regs.screen_y_control.triangle_rast_flip != 0 && 74 if (regs.screen_y_control.triangle_rast_flip != 0) {
75 regs.viewport_transform[0].scale_y > 0.0f) {
76 // Flip front face 75 // Flip front face
77 packed_front_face = 1 - packed_front_face; 76 packed_front_face = 1 - packed_front_face;
78 } 77 }
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 2871035f5..62e950d31 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -149,7 +149,7 @@ struct FormatTuple {
149 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F 149 {VK_FORMAT_R16_SFLOAT, Attachable | Storage}, // R16F
150 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U 150 {VK_FORMAT_R16_UNORM, Attachable | Storage}, // R16U
151 {VK_FORMAT_UNDEFINED}, // R16S 151 {VK_FORMAT_UNDEFINED}, // R16S
152 {VK_FORMAT_UNDEFINED}, // R16UI 152 {VK_FORMAT_R16_UINT, Attachable | Storage}, // R16UI
153 {VK_FORMAT_UNDEFINED}, // R16I 153 {VK_FORMAT_UNDEFINED}, // R16I
154 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16 154 {VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // RG16
155 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F 155 {VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // RG16F
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 750e5a0ca..9fd8ac3f6 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -73,76 +73,79 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
73 73
74std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( 74std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
75 vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { 75 vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
76 static constexpr std::array formats{VK_FORMAT_A8B8G8R8_UNORM_PACK32, 76 static constexpr std::array formats{
77 VK_FORMAT_A8B8G8R8_UINT_PACK32, 77 VK_FORMAT_A8B8G8R8_UNORM_PACK32,
78 VK_FORMAT_A8B8G8R8_SNORM_PACK32, 78 VK_FORMAT_A8B8G8R8_UINT_PACK32,
79 VK_FORMAT_A8B8G8R8_SRGB_PACK32, 79 VK_FORMAT_A8B8G8R8_SNORM_PACK32,
80 VK_FORMAT_B5G6R5_UNORM_PACK16, 80 VK_FORMAT_A8B8G8R8_SRGB_PACK32,
81 VK_FORMAT_A2B10G10R10_UNORM_PACK32, 81 VK_FORMAT_B5G6R5_UNORM_PACK16,
82 VK_FORMAT_A1R5G5B5_UNORM_PACK16, 82 VK_FORMAT_A2B10G10R10_UNORM_PACK32,
83 VK_FORMAT_R32G32B32A32_SFLOAT, 83 VK_FORMAT_A1R5G5B5_UNORM_PACK16,
84 VK_FORMAT_R32G32B32A32_UINT, 84 VK_FORMAT_R32G32B32A32_SFLOAT,
85 VK_FORMAT_R32G32_SFLOAT, 85 VK_FORMAT_R32G32B32A32_UINT,
86 VK_FORMAT_R32G32_UINT, 86 VK_FORMAT_R32G32_SFLOAT,
87 VK_FORMAT_R16G16B16A16_UINT, 87 VK_FORMAT_R32G32_UINT,
88 VK_FORMAT_R16G16B16A16_SNORM, 88 VK_FORMAT_R16G16B16A16_UINT,
89 VK_FORMAT_R16G16B16A16_UNORM, 89 VK_FORMAT_R16G16B16A16_SNORM,
90 VK_FORMAT_R16G16_UNORM, 90 VK_FORMAT_R16G16B16A16_UNORM,
91 VK_FORMAT_R16G16_SNORM, 91 VK_FORMAT_R16G16_UNORM,
92 VK_FORMAT_R16G16_SFLOAT, 92 VK_FORMAT_R16G16_SNORM,
93 VK_FORMAT_R16_UNORM, 93 VK_FORMAT_R16G16_SFLOAT,
94 VK_FORMAT_R8G8B8A8_SRGB, 94 VK_FORMAT_R16_UNORM,
95 VK_FORMAT_R8G8_UNORM, 95 VK_FORMAT_R16_UINT,
96 VK_FORMAT_R8G8_SNORM, 96 VK_FORMAT_R8G8B8A8_SRGB,
97 VK_FORMAT_R8G8_UINT, 97 VK_FORMAT_R8G8_UNORM,
98 VK_FORMAT_R8_UNORM, 98 VK_FORMAT_R8G8_SNORM,
99 VK_FORMAT_R8_UINT, 99 VK_FORMAT_R8G8_UINT,
100 VK_FORMAT_B10G11R11_UFLOAT_PACK32, 100 VK_FORMAT_R8_UNORM,
101 VK_FORMAT_R32_SFLOAT, 101 VK_FORMAT_R8_UINT,
102 VK_FORMAT_R32_UINT, 102 VK_FORMAT_B10G11R11_UFLOAT_PACK32,
103 VK_FORMAT_R32_SINT, 103 VK_FORMAT_R32_SFLOAT,
104 VK_FORMAT_R16_SFLOAT, 104 VK_FORMAT_R32_UINT,
105 VK_FORMAT_R16G16B16A16_SFLOAT, 105 VK_FORMAT_R32_SINT,
106 VK_FORMAT_B8G8R8A8_UNORM, 106 VK_FORMAT_R16_SFLOAT,
107 VK_FORMAT_B8G8R8A8_SRGB, 107 VK_FORMAT_R16G16B16A16_SFLOAT,
108 VK_FORMAT_R4G4B4A4_UNORM_PACK16, 108 VK_FORMAT_B8G8R8A8_UNORM,
109 VK_FORMAT_D32_SFLOAT, 109 VK_FORMAT_B8G8R8A8_SRGB,
110 VK_FORMAT_D16_UNORM, 110 VK_FORMAT_R4G4B4A4_UNORM_PACK16,
111 VK_FORMAT_D16_UNORM_S8_UINT, 111 VK_FORMAT_D32_SFLOAT,
112 VK_FORMAT_D24_UNORM_S8_UINT, 112 VK_FORMAT_D16_UNORM,
113 VK_FORMAT_D32_SFLOAT_S8_UINT, 113 VK_FORMAT_D16_UNORM_S8_UINT,
114 VK_FORMAT_BC1_RGBA_UNORM_BLOCK, 114 VK_FORMAT_D24_UNORM_S8_UINT,
115 VK_FORMAT_BC2_UNORM_BLOCK, 115 VK_FORMAT_D32_SFLOAT_S8_UINT,
116 VK_FORMAT_BC3_UNORM_BLOCK, 116 VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
117 VK_FORMAT_BC4_UNORM_BLOCK, 117 VK_FORMAT_BC2_UNORM_BLOCK,
118 VK_FORMAT_BC5_UNORM_BLOCK, 118 VK_FORMAT_BC3_UNORM_BLOCK,
119 VK_FORMAT_BC5_SNORM_BLOCK, 119 VK_FORMAT_BC4_UNORM_BLOCK,
120 VK_FORMAT_BC7_UNORM_BLOCK, 120 VK_FORMAT_BC5_UNORM_BLOCK,
121 VK_FORMAT_BC6H_UFLOAT_BLOCK, 121 VK_FORMAT_BC5_SNORM_BLOCK,
122 VK_FORMAT_BC6H_SFLOAT_BLOCK, 122 VK_FORMAT_BC7_UNORM_BLOCK,
123 VK_FORMAT_BC1_RGBA_SRGB_BLOCK, 123 VK_FORMAT_BC6H_UFLOAT_BLOCK,
124 VK_FORMAT_BC2_SRGB_BLOCK, 124 VK_FORMAT_BC6H_SFLOAT_BLOCK,
125 VK_FORMAT_BC3_SRGB_BLOCK, 125 VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
126 VK_FORMAT_BC7_SRGB_BLOCK, 126 VK_FORMAT_BC2_SRGB_BLOCK,
127 VK_FORMAT_ASTC_4x4_SRGB_BLOCK, 127 VK_FORMAT_BC3_SRGB_BLOCK,
128 VK_FORMAT_ASTC_8x8_SRGB_BLOCK, 128 VK_FORMAT_BC7_SRGB_BLOCK,
129 VK_FORMAT_ASTC_8x5_SRGB_BLOCK, 129 VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
130 VK_FORMAT_ASTC_5x4_SRGB_BLOCK, 130 VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
131 VK_FORMAT_ASTC_5x5_UNORM_BLOCK, 131 VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
132 VK_FORMAT_ASTC_5x5_SRGB_BLOCK, 132 VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
133 VK_FORMAT_ASTC_10x8_UNORM_BLOCK, 133 VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
134 VK_FORMAT_ASTC_10x8_SRGB_BLOCK, 134 VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
135 VK_FORMAT_ASTC_6x6_UNORM_BLOCK, 135 VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
136 VK_FORMAT_ASTC_6x6_SRGB_BLOCK, 136 VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
137 VK_FORMAT_ASTC_10x10_UNORM_BLOCK, 137 VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
138 VK_FORMAT_ASTC_10x10_SRGB_BLOCK, 138 VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
139 VK_FORMAT_ASTC_12x12_UNORM_BLOCK, 139 VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
140 VK_FORMAT_ASTC_12x12_SRGB_BLOCK, 140 VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
141 VK_FORMAT_ASTC_8x6_UNORM_BLOCK, 141 VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
142 VK_FORMAT_ASTC_8x6_SRGB_BLOCK, 142 VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
143 VK_FORMAT_ASTC_6x5_UNORM_BLOCK, 143 VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
144 VK_FORMAT_ASTC_6x5_SRGB_BLOCK, 144 VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
145 VK_FORMAT_E5B9G9R9_UFLOAT_PACK32}; 145 VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
146 VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
147 VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
148 };
146 std::unordered_map<VkFormat, VkFormatProperties> format_properties; 149 std::unordered_map<VkFormat, VkFormatProperties> format_properties;
147 for (const auto format : formats) { 150 for (const auto format : formats) {
148 format_properties.emplace(format, physical.GetFormatProperties(format)); 151 format_properties.emplace(format, physical.GetFormatProperties(format));
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a5c7b7945..65a1c6245 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -312,7 +312,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
312 ASSERT(point_size != 0.0f); 312 ASSERT(point_size != 0.0f);
313 } 313 }
314 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { 314 for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
315 specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); 315 const auto& attribute = fixed_state.vertex_input.attributes[i];
316 specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
317 specialization.attribute_types[i] = attribute.Type();
316 } 318 }
317 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; 319 specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
318 320
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index be5b77fae..a3d992ed3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -877,14 +877,10 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex
877 877
878 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { 878 for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
879 const auto& attrib = regs.vertex_attrib_format[index]; 879 const auto& attrib = regs.vertex_attrib_format[index];
880 if (!attrib.IsValid()) { 880 if (attrib.IsConstant()) {
881 vertex_input.SetAttribute(index, false, 0, 0, {}, {}); 881 vertex_input.SetAttribute(index, false, 0, 0, {}, {});
882 continue; 882 continue;
883 } 883 }
884
885 [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer];
886 ASSERT(buffer.IsEnabled());
887
888 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), 884 vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(),
889 attrib.size.Value()); 885 attrib.size.Value());
890 } 886 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 890f34a2c..a13e8baa7 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -741,8 +741,10 @@ private:
741 if (!IsGenericAttribute(index)) { 741 if (!IsGenericAttribute(index)) {
742 continue; 742 continue;
743 } 743 }
744
745 const u32 location = GetGenericAttributeLocation(index); 744 const u32 location = GetGenericAttributeLocation(index);
745 if (!IsAttributeEnabled(location)) {
746 continue;
747 }
746 const auto type_descriptor = GetAttributeType(location); 748 const auto type_descriptor = GetAttributeType(location);
747 Id type; 749 Id type;
748 if (IsInputAttributeArray()) { 750 if (IsInputAttributeArray()) {
@@ -986,6 +988,10 @@ private:
986 return stage == ShaderType::TesselationControl; 988 return stage == ShaderType::TesselationControl;
987 } 989 }
988 990
991 bool IsAttributeEnabled(u32 location) const {
992 return stage != ShaderType::Vertex || specialization.enabled_attributes[location];
993 }
994
989 u32 GetNumInputVertices() const { 995 u32 GetNumInputVertices() const {
990 switch (stage) { 996 switch (stage) {
991 case ShaderType::Geometry: 997 case ShaderType::Geometry:
@@ -1201,16 +1207,20 @@ private:
1201 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element); 1207 UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
1202 return {v_float_zero, Type::Float}; 1208 return {v_float_zero, Type::Float};
1203 default: 1209 default:
1204 if (IsGenericAttribute(attribute)) { 1210 if (!IsGenericAttribute(attribute)) {
1205 const u32 location = GetGenericAttributeLocation(attribute); 1211 break;
1206 const auto type_descriptor = GetAttributeType(location);
1207 const Type type = type_descriptor.type;
1208 const Id attribute_id = input_attributes.at(attribute);
1209 const std::vector elements = {element};
1210 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1211 return {OpLoad(GetTypeDefinition(type), pointer), type};
1212 } 1212 }
1213 break; 1213 const u32 location = GetGenericAttributeLocation(attribute);
1214 if (!IsAttributeEnabled(location)) {
1215 // Disabled attributes (also known as constant attributes) always return zero.
1216 return {v_float_zero, Type::Float};
1217 }
1218 const auto type_descriptor = GetAttributeType(location);
1219 const Type type = type_descriptor.type;
1220 const Id attribute_id = input_attributes.at(attribute);
1221 const std::vector elements = {element};
1222 const Id pointer = ArrayPass(type_descriptor.scalar, attribute_id, elements);
1223 return {OpLoad(GetTypeDefinition(type), pointer), type};
1214 } 1224 }
1215 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute)); 1225 UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
1216 return {v_float_zero, Type::Float}; 1226 return {v_float_zero, Type::Float};
@@ -2215,8 +2225,8 @@ private:
2215 return {}; 2225 return {};
2216 } 2226 }
2217 2227
2218 Expression MemoryBarrierGL(Operation) { 2228 template <spv::Scope scope>
2219 const auto scope = spv::Scope::Device; 2229 Expression MemoryBarrier(Operation) {
2220 const auto semantics = 2230 const auto semantics =
2221 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | 2231 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
2222 spv::MemorySemanticsMask::WorkgroupMemory | 2232 spv::MemorySemanticsMask::WorkgroupMemory |
@@ -2681,7 +2691,8 @@ private:
2681 &SPIRVDecompiler::ShuffleIndexed, 2691 &SPIRVDecompiler::ShuffleIndexed,
2682 2692
2683 &SPIRVDecompiler::Barrier, 2693 &SPIRVDecompiler::Barrier,
2684 &SPIRVDecompiler::MemoryBarrierGL, 2694 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
2695 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
2685 }; 2696 };
2686 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2697 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2687 2698
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index f4c05ac3c..b7af26388 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -88,7 +88,8 @@ struct Specialization final {
88 u32 shared_memory_size{}; 88 u32 shared_memory_size{};
89 89
90 // Graphics specific 90 // Graphics specific
91 std::optional<float> point_size{}; 91 std::optional<float> point_size;
92 std::bitset<Maxwell::NumVertexAttributes> enabled_attributes;
92 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{}; 93 std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
93 bool ndc_minus_one_to_one{}; 94 bool ndc_minus_one_to_one{};
94}; 95};
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index 694b325e1..c0a8f233f 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -83,7 +83,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
83 return Operation(OperationCode::YNegate); 83 return Operation(OperationCode::YNegate);
84 case SystemVariable::InvocationInfo: 84 case SystemVariable::InvocationInfo:
85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); 85 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
86 return Immediate(0U); 86 return Immediate(0x00ff'0000U);
87 case SystemVariable::WscaleFactorXY: 87 case SystemVariable::WscaleFactorXY:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); 88 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
89 return Immediate(0U); 89 return Immediate(0U);
@@ -299,9 +299,19 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
299 break; 299 break;
300 } 300 }
301 case OpCode::Id::MEMBAR: { 301 case OpCode::Id::MEMBAR: {
302 UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
303 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); 302 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
304 bb.push_back(Operation(OperationCode::MemoryBarrierGL)); 303 const OperationCode type = [instr] {
304 switch (instr.membar.type) {
305 case Tegra::Shader::MembarType::CTA:
306 return OperationCode::MemoryBarrierGroup;
307 case Tegra::Shader::MembarType::GL:
308 return OperationCode::MemoryBarrierGlobal;
309 default:
310 UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
311 return OperationCode::MemoryBarrierGlobal;
312 }
313 }();
314 bb.push_back(Operation(type));
305 break; 315 break;
306 } 316 }
307 case OpCode::Id::DEPBAR: { 317 case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c06512413..c5e5165ff 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -233,8 +233,9 @@ enum class OperationCode {
233 ThreadLtMask, /// () -> uint 233 ThreadLtMask, /// () -> uint
234 ShuffleIndexed, /// (uint value, uint index) -> uint 234 ShuffleIndexed, /// (uint value, uint index) -> uint
235 235
236 Barrier, /// () -> void 236 Barrier, /// () -> void
237 MemoryBarrierGL, /// () -> void 237 MemoryBarrierGroup, /// () -> void
238 MemoryBarrierGlobal, /// () -> void
238 239
239 Amount, 240 Amount,
240}; 241};
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 7032e0059..f476f03b0 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
41 ComponentType alpha_component; 41 ComponentType alpha_component;
42 bool is_srgb; 42 bool is_srgb;
43}; 43};
44constexpr std::array<Table, 77> DefinitionTable = {{ 44constexpr std::array<Table, 78> DefinitionTable = {{
45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, 45 {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, 46 {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, 47 {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{
98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, 98 {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, 99 {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, 100 {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
101 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, 102 {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
102 103
103 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, 104 {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8bfc541d4..6f63217a2 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
14#include <unordered_map> 14#include <unordered_map>
15#include <vector> 15#include <vector>
16 16
17#include <boost/container/small_vector.hpp>
17#include <boost/icl/interval_map.hpp> 18#include <boost/icl/interval_map.hpp>
18#include <boost/range/iterator_range.hpp> 19#include <boost/range/iterator_range.hpp>
19 20
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
53 54
54template <typename TSurface, typename TView> 55template <typename TSurface, typename TView>
55class TextureCache { 56class TextureCache {
57 using VectorSurface = boost::container::small_vector<TSurface, 1>;
56 58
57public: 59public:
58 void InvalidateRegion(VAddr addr, std::size_t size) { 60 void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -308,18 +310,20 @@ public:
308 dst_surface.first->MarkAsModified(true, Tick()); 310 dst_surface.first->MarkAsModified(true, Tick());
309 } 311 }
310 312
311 TSurface TryFindFramebufferSurface(VAddr addr) { 313 TSurface TryFindFramebufferSurface(VAddr addr) const {
312 if (!addr) { 314 if (!addr) {
313 return nullptr; 315 return nullptr;
314 } 316 }
315 const VAddr page = addr >> registry_page_bits; 317 const VAddr page = addr >> registry_page_bits;
316 std::vector<TSurface>& list = registry[page]; 318 const auto it = registry.find(page);
317 for (auto& surface : list) { 319 if (it == registry.end()) {
318 if (surface->GetCpuAddr() == addr) { 320 return nullptr;
319 return surface;
320 }
321 } 321 }
322 return nullptr; 322 const auto& list = it->second;
323 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
324 return surface->GetCpuAddr() == addr;
325 });
326 return found != list.end() ? *found : nullptr;
323 } 327 }
324 328
325 u64 Tick() { 329 u64 Tick() {
@@ -498,7 +502,7 @@ private:
498 * @param untopological Indicates to the recycler that the texture has no way 502 * @param untopological Indicates to the recycler that the texture has no way
499 * to match the overlaps due to topological reasons. 503 * to match the overlaps due to topological reasons.
500 **/ 504 **/
501 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 505 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
502 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 506 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
503 if (Settings::IsGPULevelExtreme()) { 507 if (Settings::IsGPULevelExtreme()) {
504 return RecycleStrategy::Flush; 508 return RecycleStrategy::Flush;
@@ -538,9 +542,8 @@ private:
538 * @param untopological Indicates to the recycler that the texture has no way to match the 542 * @param untopological Indicates to the recycler that the texture has no way to match the
539 * overlaps due to topological reasons. 543 * overlaps due to topological reasons.
540 **/ 544 **/
541 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 545 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
542 const SurfaceParams& params, const GPUVAddr gpu_addr, 546 const GPUVAddr gpu_addr, const bool preserve_contents,
543 const bool preserve_contents,
544 const MatchTopologyResult untopological) { 547 const MatchTopologyResult untopological) {
545 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); 548 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
546 for (auto& surface : overlaps) { 549 for (auto& surface : overlaps) {
@@ -650,47 +653,65 @@ private:
650 * @param params The parameters on the new surface. 653 * @param params The parameters on the new surface.
651 * @param gpu_addr The starting address of the new surface. 654 * @param gpu_addr The starting address of the new surface.
652 **/ 655 **/
653 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, 656 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
654 const SurfaceParams& params, 657 const SurfaceParams& params,
655 const GPUVAddr gpu_addr) { 658 GPUVAddr gpu_addr) {
656 if (params.target == SurfaceTarget::Texture3D) { 659 if (params.target == SurfaceTarget::Texture3D) {
657 return {}; 660 return std::nullopt;
658 } 661 }
659 bool modified = false; 662 const auto test_modified = [](TSurface& surface) { return surface->IsModified(); };
660 TSurface new_surface = GetUncachedSurface(gpu_addr, params); 663 TSurface new_surface = GetUncachedSurface(gpu_addr, params);
661 u32 passed_tests = 0; 664
665 if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) {
666 LoadSurface(new_surface);
667 for (const auto& surface : overlaps) {
668 Unregister(surface);
669 }
670 Register(new_surface);
671 return {{new_surface, new_surface->GetMainView()}};
672 }
673
674 std::size_t passed_tests = 0;
662 for (auto& surface : overlaps) { 675 for (auto& surface : overlaps) {
663 const SurfaceParams& src_params = surface->GetSurfaceParams(); 676 const SurfaceParams& src_params = surface->GetSurfaceParams();
664 if (src_params.is_layered || src_params.num_levels > 1) { 677 const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
665 // We send this cases to recycle as they are more complex to handle
666 return {};
667 }
668 const std::size_t candidate_size = surface->GetSizeInBytes();
669 auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
670 if (!mipmap_layer) { 678 if (!mipmap_layer) {
671 continue; 679 continue;
672 } 680 }
673 const auto [layer, mipmap] = *mipmap_layer; 681 const auto [base_layer, base_mipmap] = *mipmap_layer;
674 if (new_surface->GetMipmapSize(mipmap) != candidate_size) { 682 if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) {
675 continue; 683 continue;
676 } 684 }
677 modified |= surface->IsModified(); 685 ++passed_tests;
678 // Now we got all the data set up 686
679 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); 687 // Copy all mipmaps and layers
680 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); 688 const u32 block_width = params.GetDefaultBlockWidth();
681 const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); 689 const u32 block_height = params.GetDefaultBlockHeight();
682 passed_tests++; 690 for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) {
683 ImageCopy(surface, new_surface, copy_params); 691 const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap);
692 const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap);
693 if (width < block_width || height < block_height) {
694 // Current APIs forbid copying small compressed textures, avoid errors
695 break;
696 }
697 const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height,
698 src_params.depth);
699 ImageCopy(surface, new_surface, copy_params);
700 }
684 } 701 }
685 if (passed_tests == 0) { 702 if (passed_tests == 0) {
686 return {}; 703 return std::nullopt;
704 }
705 if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
687 // In Accurate GPU all tests should pass, else we recycle 706 // In Accurate GPU all tests should pass, else we recycle
688 } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { 707 return std::nullopt;
689 return {};
690 } 708 }
709
710 const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified);
691 for (const auto& surface : overlaps) { 711 for (const auto& surface : overlaps) {
692 Unregister(surface); 712 Unregister(surface);
693 } 713 }
714
694 new_surface->MarkAsModified(modified, Tick()); 715 new_surface->MarkAsModified(modified, Tick());
695 Register(new_surface); 716 Register(new_surface);
696 return {{new_surface, new_surface->GetMainView()}}; 717 return {{new_surface, new_surface->GetMainView()}};
@@ -708,7 +729,7 @@ private:
708 * @param preserve_contents Indicates that the new surface should be loaded from memory or 729 * @param preserve_contents Indicates that the new surface should be loaded from memory or
709 * left blank. 730 * left blank.
710 */ 731 */
711 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 732 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
712 const SurfaceParams& params, 733 const SurfaceParams& params,
713 const GPUVAddr gpu_addr, 734 const GPUVAddr gpu_addr,
714 const VAddr cpu_addr, 735 const VAddr cpu_addr,
@@ -810,7 +831,7 @@ private:
810 TSurface& current_surface = iter->second; 831 TSurface& current_surface = iter->second;
811 const auto topological_result = current_surface->MatchesTopology(params); 832 const auto topological_result = current_surface->MatchesTopology(params);
812 if (topological_result != MatchTopologyResult::FullMatch) { 833 if (topological_result != MatchTopologyResult::FullMatch) {
813 std::vector<TSurface> overlaps{current_surface}; 834 VectorSurface overlaps{current_surface};
814 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 835 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
815 topological_result); 836 topological_result);
816 } 837 }
@@ -868,12 +889,9 @@ private:
868 // two things either the candidate surface is a supertexture of the overlap 889 // two things either the candidate surface is a supertexture of the overlap
869 // or they don't match in any known way. 890 // or they don't match in any known way.
870 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { 891 if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
871 if (current_surface->GetGpuAddr() == gpu_addr) { 892 const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr);
872 std::optional<std::pair<TSurface, TView>> view = 893 if (view) {
873 TryReconstructSurface(overlaps, params, gpu_addr); 894 return *view;
874 if (view) {
875 return *view;
876 }
877 } 895 }
878 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 896 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
879 MatchTopologyResult::FullMatch); 897 MatchTopologyResult::FullMatch);
@@ -1126,23 +1144,25 @@ private:
1126 } 1144 }
1127 } 1145 }
1128 1146
1129 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1147 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1130 if (size == 0) { 1148 if (size == 0) {
1131 return {}; 1149 return {};
1132 } 1150 }
1133 const VAddr cpu_addr_end = cpu_addr + size; 1151 const VAddr cpu_addr_end = cpu_addr + size;
1134 VAddr start = cpu_addr >> registry_page_bits;
1135 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1152 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1136 std::vector<TSurface> surfaces; 1153 VectorSurface surfaces;
1137 while (start <= end) { 1154 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1138 std::vector<TSurface>& list = registry[start]; 1155 const auto it = registry.find(start);
1139 for (auto& surface : list) { 1156 if (it == registry.end()) {
1140 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { 1157 continue;
1141 surface->MarkAsPicked(true); 1158 }
1142 surfaces.push_back(surface); 1159 for (auto& surface : it->second) {
1160 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
1161 continue;
1143 } 1162 }
1163 surface->MarkAsPicked(true);
1164 surfaces.push_back(surface);
1144 } 1165 }
1145 start++;
1146 } 1166 }
1147 for (auto& surface : surfaces) { 1167 for (auto& surface : surfaces) {
1148 surface->MarkAsPicked(false); 1168 surface->MarkAsPicked(false);
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 1adf8932b..1f5e43043 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -106,6 +106,9 @@ public:
106 format.setVersion(4, 3); 106 format.setVersion(4, 3);
107 format.setProfile(QSurfaceFormat::CompatibilityProfile); 107 format.setProfile(QSurfaceFormat::CompatibilityProfile);
108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); 108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
109 if (Settings::values.renderer_debug) {
110 format.setOption(QSurfaceFormat::FormatOption::DebugContext);
111 }
109 // TODO: expose a setting for buffer value (ie default/single/double/triple) 112 // TODO: expose a setting for buffer value (ie default/single/double/triple)
110 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); 113 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
111 format.setSwapInterval(0); 114 format.setSwapInterval(0);
diff --git a/src/yuzu/configuration/configure_input_player.cpp b/src/yuzu/configuration/configure_input_player.cpp
index e4eb5594b..a05fa64ba 100644
--- a/src/yuzu/configuration/configure_input_player.cpp
+++ b/src/yuzu/configuration/configure_input_player.cpp
@@ -480,7 +480,9 @@ void ConfigureInputPlayer::RestoreDefaults() {
480 SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]); 480 SetAnalogButton(params, analogs_param[analog_id], analog_sub_buttons[sub_button_id]);
481 } 481 }
482 } 482 }
483
483 UpdateButtonLabels(); 484 UpdateButtonLabels();
485 ApplyConfiguration();
484} 486}
485 487
486void ConfigureInputPlayer::ClearAll() { 488void ConfigureInputPlayer::ClearAll() {
@@ -505,6 +507,7 @@ void ConfigureInputPlayer::ClearAll() {
505 } 507 }
506 508
507 UpdateButtonLabels(); 509 UpdateButtonLabels();
510 ApplyConfiguration();
508} 511}
509 512
510void ConfigureInputPlayer::UpdateButtonLabels() { 513void ConfigureInputPlayer::UpdateButtonLabels() {
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 411e7e647..09cc0a3b5 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); 98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); 99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); 100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
101 if (Settings::values.renderer_debug) {
102 SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
103 }
101 SDL_GL_SetSwapInterval(0); 104 SDL_GL_SetSwapInterval(0);
102 105
103 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, 106 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,