summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules3
-rw-r--r--CMakeLists.txt4
-rw-r--r--externals/CMakeLists.txt8
m---------externals/sirit0
m---------externals/xbyak0
-rw-r--r--src/audio_core/audio_renderer.cpp99
-rw-r--r--src/audio_core/audio_renderer.h10
-rw-r--r--src/audio_core/common.h1
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/common/memory_detect.cpp60
-rw-r--r--src/common/memory_detect.h22
-rw-r--r--src/common/time_zone.cpp49
-rw-r--r--src/common/time_zone.h18
-rw-r--r--src/common/x64/xbyak_abi.h266
-rw-r--r--src/common/x64/xbyak_util.h47
-rw-r--r--src/core/file_sys/control_metadata.cpp4
-rw-r--r--src/core/file_sys/control_metadata.h1
-rw-r--r--src/core/file_sys/patch_manager.cpp34
-rw-r--r--src/core/file_sys/patch_manager.h5
-rw-r--r--src/core/file_sys/savedata_factory.cpp9
-rw-r--r--src/core/file_sys/system_archive/system_version.cpp14
-rw-r--r--src/core/frontend/emu_window.cpp2
-rw-r--r--src/core/frontend/framebuffer_layout.h5
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp38
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.h1
-rw-r--r--src/core/hle/service/hid/controllers/keyboard.cpp7
-rw-r--r--src/core/hle/service/nifm/nifm.cpp3
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp4
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h10
-rw-r--r--src/core/hle/service/time/time_manager.cpp11
-rw-r--r--src/core/hle/service/time/time_zone_content_manager.cpp24
-rw-r--r--src/core/settings.cpp17
-rw-r--r--src/core/settings.h5
-rw-r--r--src/core/telemetry_session.cpp1
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h227
-rw-r--r--src/video_core/buffer_cache/map_interval.cpp33
-rw-r--r--src/video_core/buffer_cache/map_interval.h133
-rw-r--r--src/video_core/dma_pusher.cpp9
-rw-r--r--src/video_core/dma_pusher.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp13
-rw-r--r--src/video_core/engines/shader_bytecode.h28
-rw-r--r--src/video_core/rasterizer_cache.h58
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_device.h10
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp125
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h16
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp101
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp110
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h56
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp71
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h26
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp19
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp8
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp81
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp29
-rw-r--r--src/video_core/shader/decode.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp3
-rw-r--r--src/video_core/shader/decode/other.cpp40
-rw-r--r--src/video_core/shader/decode/xmad.cpp12
-rw-r--r--src/video_core/shader/node.h30
-rw-r--r--src/video_core/shader/shader_ir.cpp109
-rw-r--r--src/video_core/texture_cache/texture_cache.h57
-rw-r--r--src/yuzu/bootmanager.cpp19
-rw-r--r--src/yuzu/configuration/config.cpp7
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp6
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui10
-rw-r--r--src/yuzu/configuration/configure_system.cpp2
-rw-r--r--src/yuzu/configuration/configure_system.h1
-rw-r--r--src/yuzu/configuration/configure_system.ui257
-rw-r--r--src/yuzu/discord_impl.cpp2
-rw-r--r--src/yuzu/game_list.cpp8
-rw-r--r--src/yuzu/game_list.h2
-rw-r--r--src/yuzu/loading_screen.cpp3
-rw-r--r--src/yuzu/main.cpp90
-rw-r--r--src/yuzu/main.h3
-rw-r--r--src/yuzu/main.ui12
-rw-r--r--src/yuzu_cmd/config.cpp7
-rw-r--r--src/yuzu_cmd/default_ini.h8
-rw-r--r--src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp3
92 files changed, 2192 insertions, 669 deletions
diff --git a/.gitmodules b/.gitmodules
index bf3b80d59..2ec9dda62 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -28,3 +28,6 @@
28[submodule "libzip"] 28[submodule "libzip"]
29 path = externals/libzip/libzip 29 path = externals/libzip/libzip
30 url = https://github.com/nih-at/libzip.git 30 url = https://github.com/nih-at/libzip.git
31[submodule "xbyak"]
32 path = externals/xbyak
33 url = https://github.com/herumi/xbyak.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 61321bf0a..a9f669d56 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
1cmake_minimum_required(VERSION 3.11) 1cmake_minimum_required(VERSION 3.15)
2 2
3list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules") 3list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
4list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules") 4list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/externals/cmake-modules")
@@ -13,7 +13,7 @@ project(yuzu)
13option(ENABLE_SDL2 "Enable the SDL2 frontend" ON) 13option(ENABLE_SDL2 "Enable the SDL2 frontend" ON)
14 14
15option(ENABLE_QT "Enable the Qt frontend" ON) 15option(ENABLE_QT "Enable the Qt frontend" ON)
16CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" OFF "ENABLE_QT;MSVC" OFF) 16CMAKE_DEPENDENT_OPTION(YUZU_USE_BUNDLED_QT "Download bundled Qt binaries" ON "ENABLE_QT;MSVC" OFF)
17 17
18option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON) 18option(ENABLE_WEB_SERVICE "Enable web services (telemetry, etc.)" ON)
19 19
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 0b40cd1b0..df7a5e0a9 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -75,3 +75,11 @@ if (ENABLE_WEB_SERVICE)
75 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT) 75 target_compile_definitions(httplib INTERFACE -DCPPHTTPLIB_OPENSSL_SUPPORT)
76 target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto) 76 target_link_libraries(httplib INTERFACE OpenSSL::SSL OpenSSL::Crypto)
77endif() 77endif()
78
79if (NOT TARGET xbyak)
80 if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64)
81 add_library(xbyak INTERFACE)
82 target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
83 target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
84 endif()
85endif()
diff --git a/externals/sirit b/externals/sirit
Subproject 414fc4dbd28d8fe48f735a0c389db8a234f733c Subproject a62c5bbc100a5e5a31ea0ccc4a78d8fa6a4167c
diff --git a/externals/xbyak b/externals/xbyak
new file mode 160000
Subproject 82b70e665918efc2ee348091742fd0237b3b68c
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index d18ef6940..50846a854 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -17,7 +17,7 @@ namespace AudioCore {
17 17
18constexpr u32 STREAM_SAMPLE_RATE{48000}; 18constexpr u32 STREAM_SAMPLE_RATE{48000};
19constexpr u32 STREAM_NUM_CHANNELS{2}; 19constexpr u32 STREAM_NUM_CHANNELS{2};
20 20using VoiceChannelHolder = std::array<VoiceResourceInformation*, 6>;
21class AudioRenderer::VoiceState { 21class AudioRenderer::VoiceState {
22public: 22public:
23 bool IsPlaying() const { 23 bool IsPlaying() const {
@@ -37,9 +37,10 @@ public:
37 } 37 }
38 38
39 void SetWaveIndex(std::size_t index); 39 void SetWaveIndex(std::size_t index);
40 std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory); 40 std::vector<s16> DequeueSamples(std::size_t sample_count, Core::Memory::Memory& memory,
41 const VoiceChannelHolder& voice_resources);
41 void UpdateState(); 42 void UpdateState();
42 void RefreshBuffer(Core::Memory::Memory& memory); 43 void RefreshBuffer(Core::Memory::Memory& memory, const VoiceChannelHolder& voice_resources);
43 44
44private: 45private:
45 bool is_in_use{}; 46 bool is_in_use{};
@@ -79,7 +80,7 @@ AudioRenderer::AudioRenderer(Core::Timing::CoreTiming& core_timing, Core::Memory
79 std::shared_ptr<Kernel::WritableEvent> buffer_event, 80 std::shared_ptr<Kernel::WritableEvent> buffer_event,
80 std::size_t instance_number) 81 std::size_t instance_number)
81 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count), 82 : worker_params{params}, buffer_event{buffer_event}, voices(params.voice_count),
82 effects(params.effect_count), memory{memory_} { 83 voice_resources(params.voice_count), effects(params.effect_count), memory{memory_} {
83 behavior_info.SetUserRevision(params.revision); 84 behavior_info.SetUserRevision(params.revision);
84 audio_out = std::make_unique<AudioCore::AudioOut>(); 85 audio_out = std::make_unique<AudioCore::AudioOut>();
85 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS, 86 stream = audio_out->OpenStream(core_timing, STREAM_SAMPLE_RATE, STREAM_NUM_CHANNELS,
@@ -127,6 +128,12 @@ ResultVal<std::vector<u8>> AudioRenderer::UpdateAudioRenderer(const std::vector<
127 input_params.data() + sizeof(UpdateDataHeader) + config.behavior_size, 128 input_params.data() + sizeof(UpdateDataHeader) + config.behavior_size,
128 memory_pool_count * sizeof(MemoryPoolInfo)); 129 memory_pool_count * sizeof(MemoryPoolInfo));
129 130
131 // Copy voice resources
132 const std::size_t voice_resource_offset{sizeof(UpdateDataHeader) + config.behavior_size +
133 config.memory_pools_size};
134 std::memcpy(voice_resources.data(), input_params.data() + voice_resource_offset,
135 sizeof(VoiceResourceInformation) * voice_resources.size());
136
130 // Copy VoiceInfo structs 137 // Copy VoiceInfo structs
131 std::size_t voice_offset{sizeof(UpdateDataHeader) + config.behavior_size + 138 std::size_t voice_offset{sizeof(UpdateDataHeader) + config.behavior_size +
132 config.memory_pools_size + config.voice_resource_size}; 139 config.memory_pools_size + config.voice_resource_size};
@@ -220,14 +227,15 @@ void AudioRenderer::VoiceState::SetWaveIndex(std::size_t index) {
220 is_refresh_pending = true; 227 is_refresh_pending = true;
221} 228}
222 229
223std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(std::size_t sample_count, 230std::vector<s16> AudioRenderer::VoiceState::DequeueSamples(
224 Core::Memory::Memory& memory) { 231 std::size_t sample_count, Core::Memory::Memory& memory,
232 const VoiceChannelHolder& voice_resources) {
225 if (!IsPlaying()) { 233 if (!IsPlaying()) {
226 return {}; 234 return {};
227 } 235 }
228 236
229 if (is_refresh_pending) { 237 if (is_refresh_pending) {
230 RefreshBuffer(memory); 238 RefreshBuffer(memory, voice_resources);
231 } 239 }
232 240
233 const std::size_t max_size{samples.size() - offset}; 241 const std::size_t max_size{samples.size() - offset};
@@ -271,7 +279,8 @@ void AudioRenderer::VoiceState::UpdateState() {
271 is_in_use = info.is_in_use; 279 is_in_use = info.is_in_use;
272} 280}
273 281
274void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) { 282void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory,
283 const VoiceChannelHolder& voice_resources) {
275 const auto wave_buffer_address = info.wave_buffer[wave_index].buffer_addr; 284 const auto wave_buffer_address = info.wave_buffer[wave_index].buffer_addr;
276 const auto wave_buffer_size = info.wave_buffer[wave_index].buffer_sz; 285 const auto wave_buffer_size = info.wave_buffer[wave_index].buffer_sz;
277 std::vector<s16> new_samples(wave_buffer_size / sizeof(s16)); 286 std::vector<s16> new_samples(wave_buffer_size / sizeof(s16));
@@ -296,17 +305,77 @@ void AudioRenderer::VoiceState::RefreshBuffer(Core::Memory::Memory& memory) {
296 } 305 }
297 306
298 switch (info.channel_count) { 307 switch (info.channel_count) {
299 case 1: 308 case 1: {
300 // 1 channel is upsampled to 2 channel 309 // 1 channel is upsampled to 2 channel
301 samples.resize(new_samples.size() * 2); 310 samples.resize(new_samples.size() * 2);
311
302 for (std::size_t index = 0; index < new_samples.size(); ++index) { 312 for (std::size_t index = 0; index < new_samples.size(); ++index) {
303 samples[index * 2] = new_samples[index]; 313 auto sample = static_cast<float>(new_samples[index]);
304 samples[index * 2 + 1] = new_samples[index]; 314 if (voice_resources[0]->in_use) {
315 sample *= voice_resources[0]->mix_volumes[0];
316 }
317
318 samples[index * 2] = static_cast<s16>(sample * info.volume);
319 samples[index * 2 + 1] = static_cast<s16>(sample * info.volume);
305 } 320 }
306 break; 321 break;
322 }
307 case 2: { 323 case 2: {
308 // 2 channel is played as is 324 // 2 channel is played as is
309 samples = std::move(new_samples); 325 samples = std::move(new_samples);
326 const std::size_t sample_count = (samples.size() / 2);
327 for (std::size_t index = 0; index < sample_count; ++index) {
328 const std::size_t index_l = index * 2;
329 const std::size_t index_r = index * 2 + 1;
330
331 auto sample_l = static_cast<float>(samples[index_l]);
332 auto sample_r = static_cast<float>(samples[index_r]);
333
334 if (voice_resources[0]->in_use) {
335 sample_l *= voice_resources[0]->mix_volumes[0];
336 }
337
338 if (voice_resources[1]->in_use) {
339 sample_r *= voice_resources[1]->mix_volumes[1];
340 }
341
342 samples[index_l] = static_cast<s16>(sample_l * info.volume);
343 samples[index_r] = static_cast<s16>(sample_r * info.volume);
344 }
345 break;
346 }
347 case 6: {
348 samples.resize((new_samples.size() / 6) * 2);
349 const std::size_t sample_count = samples.size() / 2;
350
351 for (std::size_t index = 0; index < sample_count; ++index) {
352 auto FL = static_cast<float>(new_samples[index * 6]);
353 auto FR = static_cast<float>(new_samples[index * 6 + 1]);
354 auto FC = static_cast<float>(new_samples[index * 6 + 2]);
355 auto BL = static_cast<float>(new_samples[index * 6 + 4]);
356 auto BR = static_cast<float>(new_samples[index * 6 + 5]);
357
358 if (voice_resources[0]->in_use) {
359 FL *= voice_resources[0]->mix_volumes[0];
360 }
361 if (voice_resources[1]->in_use) {
362 FR *= voice_resources[1]->mix_volumes[1];
363 }
364 if (voice_resources[2]->in_use) {
365 FC *= voice_resources[2]->mix_volumes[2];
366 }
367 if (voice_resources[4]->in_use) {
368 BL *= voice_resources[4]->mix_volumes[4];
369 }
370 if (voice_resources[5]->in_use) {
371 BR *= voice_resources[5]->mix_volumes[5];
372 }
373
374 samples[index * 2] =
375 static_cast<s16>((0.3694f * FL + 0.2612f * FC + 0.3694f * BL) * info.volume);
376 samples[index * 2 + 1] =
377 static_cast<s16>((0.3694f * FR + 0.2612f * FC + 0.3694f * BR) * info.volume);
378 }
310 break; 379 break;
311 } 380 }
312 default: 381 default:
@@ -352,11 +421,17 @@ void AudioRenderer::QueueMixedBuffer(Buffer::Tag tag) {
352 if (!voice.IsPlaying()) { 421 if (!voice.IsPlaying()) {
353 continue; 422 continue;
354 } 423 }
424 VoiceChannelHolder resources{};
425 for (u32 channel = 0; channel < voice.GetInfo().channel_count; channel++) {
426 const auto channel_resource_id = voice.GetInfo().voice_channel_resource_ids[channel];
427 resources[channel] = &voice_resources[channel_resource_id];
428 }
355 429
356 std::size_t offset{}; 430 std::size_t offset{};
357 s64 samples_remaining{BUFFER_SIZE}; 431 s64 samples_remaining{BUFFER_SIZE};
358 while (samples_remaining > 0) { 432 while (samples_remaining > 0) {
359 const std::vector<s16> samples{voice.DequeueSamples(samples_remaining, memory)}; 433 const std::vector<s16> samples{
434 voice.DequeueSamples(samples_remaining, memory, resources)};
360 435
361 if (samples.empty()) { 436 if (samples.empty()) {
362 break; 437 break;
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index b42770fae..1f9114c07 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -9,6 +9,7 @@
9#include <vector> 9#include <vector>
10 10
11#include "audio_core/behavior_info.h" 11#include "audio_core/behavior_info.h"
12#include "audio_core/common.h"
12#include "audio_core/stream.h" 13#include "audio_core/stream.h"
13#include "common/common_funcs.h" 14#include "common/common_funcs.h"
14#include "common/common_types.h" 15#include "common/common_types.h"
@@ -116,6 +117,14 @@ struct WaveBuffer {
116}; 117};
117static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size"); 118static_assert(sizeof(WaveBuffer) == 0x38, "WaveBuffer has wrong size");
118 119
120struct VoiceResourceInformation {
121 s32_le id{};
122 std::array<float_le, MAX_MIX_BUFFERS> mix_volumes{};
123 bool in_use{};
124 INSERT_PADDING_BYTES(11);
125};
126static_assert(sizeof(VoiceResourceInformation) == 0x70, "VoiceResourceInformation has wrong size");
127
119struct VoiceInfo { 128struct VoiceInfo {
120 u32_le id; 129 u32_le id;
121 u32_le node_id; 130 u32_le node_id;
@@ -244,6 +253,7 @@ private:
244 AudioRendererParameter worker_params; 253 AudioRendererParameter worker_params;
245 std::shared_ptr<Kernel::WritableEvent> buffer_event; 254 std::shared_ptr<Kernel::WritableEvent> buffer_event;
246 std::vector<VoiceState> voices; 255 std::vector<VoiceState> voices;
256 std::vector<VoiceResourceInformation> voice_resources;
247 std::vector<EffectState> effects; 257 std::vector<EffectState> effects;
248 std::unique_ptr<AudioOut> audio_out; 258 std::unique_ptr<AudioOut> audio_out;
249 StreamPtr stream; 259 StreamPtr stream;
diff --git a/src/audio_core/common.h b/src/audio_core/common.h
index 98478b66b..7bb145c53 100644
--- a/src/audio_core/common.h
+++ b/src/audio_core/common.h
@@ -14,6 +14,7 @@ constexpr ResultCode ERR_INVALID_PARAMETERS{ErrorModule::Audio, 41};
14} 14}
15 15
16constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8'); 16constexpr u32_le CURRENT_PROCESS_REVISION = Common::MakeMagic('R', 'E', 'V', '8');
17constexpr std::size_t MAX_MIX_BUFFERS = 24;
17 18
18static constexpr u32 VersionFromRevision(u32_le rev) { 19static constexpr u32 VersionFromRevision(u32_le rev) {
19 // "REV7" -> 7 20 // "REV7" -> 7
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index d1ec8ff08..24b7a083c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -123,6 +123,8 @@ add_library(common STATIC
123 lz4_compression.cpp 123 lz4_compression.cpp
124 lz4_compression.h 124 lz4_compression.h
125 math_util.h 125 math_util.h
126 memory_detect.cpp
127 memory_detect.h
126 memory_hook.cpp 128 memory_hook.cpp
127 memory_hook.h 129 memory_hook.h
128 microprofile.cpp 130 microprofile.cpp
@@ -148,6 +150,8 @@ add_library(common STATIC
148 thread.h 150 thread.h
149 thread_queue_list.h 151 thread_queue_list.h
150 threadsafe_queue.h 152 threadsafe_queue.h
153 time_zone.cpp
154 time_zone.h
151 timer.cpp 155 timer.cpp
152 timer.h 156 timer.h
153 uint128.cpp 157 uint128.cpp
@@ -167,10 +171,12 @@ if(ARCHITECTURE_x86_64)
167 PRIVATE 171 PRIVATE
168 x64/cpu_detect.cpp 172 x64/cpu_detect.cpp
169 x64/cpu_detect.h 173 x64/cpu_detect.h
174 x64/xbyak_abi.h
175 x64/xbyak_util.h
170 ) 176 )
171endif() 177endif()
172 178
173create_target_directory_groups(common) 179create_target_directory_groups(common)
174 180
175target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile) 181target_link_libraries(common PUBLIC Boost::boost fmt::fmt microprofile)
176target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd) 182target_link_libraries(common PRIVATE lz4::lz4 zstd::zstd xbyak)
diff --git a/src/common/memory_detect.cpp b/src/common/memory_detect.cpp
new file mode 100644
index 000000000..3fdc309a2
--- /dev/null
+++ b/src/common/memory_detect.cpp
@@ -0,0 +1,60 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#ifdef _WIN32
6// clang-format off
7#include <windows.h>
8#include <sysinfoapi.h>
9// clang-format on
10#else
11#include <sys/types.h>
12#ifdef __APPLE__
13#include <sys/sysctl.h>
14#else
15#include <sys/sysinfo.h>
16#endif
17#endif
18
19#include "common/memory_detect.h"
20
21namespace Common {
22
23// Detects the RAM and Swapfile sizes
24static MemoryInfo Detect() {
25 MemoryInfo mem_info{};
26
27#ifdef _WIN32
28 MEMORYSTATUSEX memorystatus;
29 memorystatus.dwLength = sizeof(memorystatus);
30 GlobalMemoryStatusEx(&memorystatus);
31 mem_info.TotalPhysicalMemory = memorystatus.ullTotalPhys;
32 mem_info.TotalSwapMemory = memorystatus.ullTotalPageFile - mem_info.TotalPhysicalMemory;
33#elif defined(__APPLE__)
34 u64 ramsize;
35 struct xsw_usage vmusage;
36 std::size_t sizeof_ramsize = sizeof(ramsize);
37 std::size_t sizeof_vmusage = sizeof(vmusage);
38 // hw and vm are defined in sysctl.h
39 // https://github.com/apple/darwin-xnu/blob/master/bsd/sys/sysctl.h#L471
40 // sysctlbyname(const char *, void *, size_t *, void *, size_t);
41 sysctlbyname("hw.memsize", &ramsize, &sizeof_ramsize, NULL, 0);
42 sysctlbyname("vm.swapusage", &vmusage, &sizeof_vmusage, NULL, 0);
43 mem_info.TotalPhysicalMemory = ramsize;
44 mem_info.TotalSwapMemory = vmusage.xsu_total;
45#else
46 struct sysinfo meminfo;
47 sysinfo(&meminfo);
48 mem_info.TotalPhysicalMemory = meminfo.totalram;
49 mem_info.TotalSwapMemory = meminfo.totalswap;
50#endif
51
52 return mem_info;
53}
54
55const MemoryInfo& GetMemInfo() {
56 static MemoryInfo mem_info = Detect();
57 return mem_info;
58}
59
60} // namespace Common \ No newline at end of file
diff --git a/src/common/memory_detect.h b/src/common/memory_detect.h
new file mode 100644
index 000000000..a73c0f3f4
--- /dev/null
+++ b/src/common/memory_detect.h
@@ -0,0 +1,22 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Common {
10
11struct MemoryInfo {
12 u64 TotalPhysicalMemory{};
13 u64 TotalSwapMemory{};
14};
15
16/**
17 * Gets the memory info of the host system
18 * @return Reference to a MemoryInfo struct with the physical and swap memory sizes in bytes
19 */
20const MemoryInfo& GetMemInfo();
21
22} // namespace Common \ No newline at end of file
diff --git a/src/common/time_zone.cpp b/src/common/time_zone.cpp
new file mode 100644
index 000000000..ce239eb63
--- /dev/null
+++ b/src/common/time_zone.cpp
@@ -0,0 +1,49 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <chrono>
6#include <iomanip>
7#include <sstream>
8
9#include "common/logging/log.h"
10#include "common/time_zone.h"
11
12namespace Common::TimeZone {
13
14std::string GetDefaultTimeZone() {
15 return "GMT";
16}
17
18static std::string GetOsTimeZoneOffset() {
19 const std::time_t t{std::time(nullptr)};
20 const std::tm tm{*std::localtime(&t)};
21
22 std::stringstream ss;
23 ss << std::put_time(&tm, "%z"); // Get the current timezone offset, e.g. "-400", as a string
24
25 return ss.str();
26}
27
28static int ConvertOsTimeZoneOffsetToInt(const std::string& timezone) {
29 try {
30 return std::stoi(timezone);
31 } catch (const std::invalid_argument&) {
32 LOG_CRITICAL(Common, "invalid_argument with {}!", timezone);
33 return 0;
34 } catch (const std::out_of_range&) {
35 LOG_CRITICAL(Common, "out_of_range with {}!", timezone);
36 return 0;
37 }
38}
39
40std::chrono::seconds GetCurrentOffsetSeconds() {
41 const int offset{ConvertOsTimeZoneOffsetToInt(GetOsTimeZoneOffset())};
42
43 int seconds{(offset / 100) * 60 * 60}; // Convert hour component to seconds
44 seconds += (offset % 100) * 60; // Convert minute component to seconds
45
46 return std::chrono::seconds{seconds};
47}
48
49} // namespace Common::TimeZone
diff --git a/src/common/time_zone.h b/src/common/time_zone.h
new file mode 100644
index 000000000..945daa09c
--- /dev/null
+++ b/src/common/time_zone.h
@@ -0,0 +1,18 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <chrono>
8#include <string>
9
10namespace Common::TimeZone {
11
12/// Gets the default timezone, i.e. "GMT"
13std::string GetDefaultTimeZone();
14
15/// Gets the offset of the current timezone (from the default), in seconds
16std::chrono::seconds GetCurrentOffsetSeconds();
17
18} // namespace Common::TimeZone
diff --git a/src/common/x64/xbyak_abi.h b/src/common/x64/xbyak_abi.h
new file mode 100644
index 000000000..794da8a52
--- /dev/null
+++ b/src/common/x64/xbyak_abi.h
@@ -0,0 +1,266 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <bitset>
8#include <initializer_list>
9#include <xbyak.h>
10#include "common/assert.h"
11
12namespace Common::X64 {
13
14inline int RegToIndex(const Xbyak::Reg& reg) {
15 using Kind = Xbyak::Reg::Kind;
16 ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
17 "RegSet only support GPRs and XMM registers.");
18 ASSERT_MSG(reg.getIdx() < 16, "RegSet only supports XXM0-15.");
19 return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
20}
21
22inline Xbyak::Reg64 IndexToReg64(int reg_index) {
23 ASSERT(reg_index < 16);
24 return Xbyak::Reg64(reg_index);
25}
26
27inline Xbyak::Xmm IndexToXmm(int reg_index) {
28 ASSERT(reg_index >= 16 && reg_index < 32);
29 return Xbyak::Xmm(reg_index - 16);
30}
31
32inline Xbyak::Reg IndexToReg(int reg_index) {
33 if (reg_index < 16) {
34 return IndexToReg64(reg_index);
35 } else {
36 return IndexToXmm(reg_index);
37 }
38}
39
40inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
41 std::bitset<32> bits;
42 for (const Xbyak::Reg& reg : regs) {
43 bits[RegToIndex(reg)] = true;
44 }
45 return bits;
46}
47
48const std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
49const std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
50
51#ifdef _WIN32
52
53// Microsoft x64 ABI
54const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
55const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
56const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
57const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
58const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
59
60const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
61 // GPRs
62 Xbyak::util::rcx,
63 Xbyak::util::rdx,
64 Xbyak::util::r8,
65 Xbyak::util::r9,
66 Xbyak::util::r10,
67 Xbyak::util::r11,
68 // XMMs
69 Xbyak::util::xmm0,
70 Xbyak::util::xmm1,
71 Xbyak::util::xmm2,
72 Xbyak::util::xmm3,
73 Xbyak::util::xmm4,
74 Xbyak::util::xmm5,
75});
76
77const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
78 // GPRs
79 Xbyak::util::rbx,
80 Xbyak::util::rsi,
81 Xbyak::util::rdi,
82 Xbyak::util::rbp,
83 Xbyak::util::r12,
84 Xbyak::util::r13,
85 Xbyak::util::r14,
86 Xbyak::util::r15,
87 // XMMs
88 Xbyak::util::xmm6,
89 Xbyak::util::xmm7,
90 Xbyak::util::xmm8,
91 Xbyak::util::xmm9,
92 Xbyak::util::xmm10,
93 Xbyak::util::xmm11,
94 Xbyak::util::xmm12,
95 Xbyak::util::xmm13,
96 Xbyak::util::xmm14,
97 Xbyak::util::xmm15,
98});
99
100constexpr size_t ABI_SHADOW_SPACE = 0x20;
101
102#else
103
104// System V x86-64 ABI
105const Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
106const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
107const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
108const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
109const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
110
111const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
112 // GPRs
113 Xbyak::util::rcx,
114 Xbyak::util::rdx,
115 Xbyak::util::rdi,
116 Xbyak::util::rsi,
117 Xbyak::util::r8,
118 Xbyak::util::r9,
119 Xbyak::util::r10,
120 Xbyak::util::r11,
121 // XMMs
122 Xbyak::util::xmm0,
123 Xbyak::util::xmm1,
124 Xbyak::util::xmm2,
125 Xbyak::util::xmm3,
126 Xbyak::util::xmm4,
127 Xbyak::util::xmm5,
128 Xbyak::util::xmm6,
129 Xbyak::util::xmm7,
130 Xbyak::util::xmm8,
131 Xbyak::util::xmm9,
132 Xbyak::util::xmm10,
133 Xbyak::util::xmm11,
134 Xbyak::util::xmm12,
135 Xbyak::util::xmm13,
136 Xbyak::util::xmm14,
137 Xbyak::util::xmm15,
138});
139
140const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
141 // GPRs
142 Xbyak::util::rbx,
143 Xbyak::util::rbp,
144 Xbyak::util::r12,
145 Xbyak::util::r13,
146 Xbyak::util::r14,
147 Xbyak::util::r15,
148});
149
150constexpr size_t ABI_SHADOW_SPACE = 0;
151
152#endif
153
154inline void ABI_CalculateFrameSize(std::bitset<32> regs, size_t rsp_alignment,
155 size_t needed_frame_size, s32* out_subtraction,
156 s32* out_xmm_offset) {
157 const auto count = (regs & ABI_ALL_GPRS).count();
158 rsp_alignment -= count * 8;
159 size_t subtraction = 0;
160 const auto xmm_count = (regs & ABI_ALL_XMMS).count();
161 if (xmm_count) {
162 // If we have any XMMs to save, we must align the stack here.
163 subtraction = rsp_alignment & 0xF;
164 }
165 subtraction += 0x10 * xmm_count;
166 size_t xmm_base_subtraction = subtraction;
167 subtraction += needed_frame_size;
168 subtraction += ABI_SHADOW_SPACE;
169 // Final alignment.
170 rsp_alignment -= subtraction;
171 subtraction += rsp_alignment & 0xF;
172
173 *out_subtraction = (s32)subtraction;
174 *out_xmm_offset = (s32)(subtraction - xmm_base_subtraction);
175}
176
177inline size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
178 size_t rsp_alignment, size_t needed_frame_size = 0) {
179 s32 subtraction, xmm_offset;
180 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
181 for (std::size_t i = 0; i < regs.size(); ++i) {
182 if (regs[i] && ABI_ALL_GPRS[i]) {
183 code.push(IndexToReg64(static_cast<int>(i)));
184 }
185 }
186 if (subtraction != 0) {
187 code.sub(code.rsp, subtraction);
188 }
189
190 for (int i = 0; i < regs.count(); i++) {
191 if (regs.test(i) & ABI_ALL_GPRS.test(i)) {
192 code.push(IndexToReg64(i));
193 }
194 }
195
196 for (std::size_t i = 0; i < regs.size(); ++i) {
197 if (regs[i] && ABI_ALL_XMMS[i]) {
198 code.movaps(code.xword[code.rsp + xmm_offset], IndexToXmm(static_cast<int>(i)));
199 xmm_offset += 0x10;
200 }
201 }
202
203 return ABI_SHADOW_SPACE;
204}
205
206inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
207 size_t rsp_alignment, size_t needed_frame_size = 0) {
208 s32 subtraction, xmm_offset;
209 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
210
211 for (std::size_t i = 0; i < regs.size(); ++i) {
212 if (regs[i] && ABI_ALL_XMMS[i]) {
213 code.movaps(IndexToXmm(static_cast<int>(i)), code.xword[code.rsp + xmm_offset]);
214 xmm_offset += 0x10;
215 }
216 }
217
218 if (subtraction != 0) {
219 code.add(code.rsp, subtraction);
220 }
221
222 // GPRs need to be popped in reverse order
223 for (int i = 15; i >= 0; i--) {
224 if (regs[i]) {
225 code.pop(IndexToReg64(i));
226 }
227 }
228}
229
230inline size_t ABI_PushRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
231 size_t rsp_alignment,
232 size_t needed_frame_size = 0) {
233 s32 subtraction, xmm_offset;
234 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
235
236 for (std::size_t i = 0; i < regs.size(); ++i) {
237 if (regs[i] && ABI_ALL_GPRS[i]) {
238 code.push(IndexToReg64(static_cast<int>(i)));
239 }
240 }
241
242 if (subtraction != 0) {
243 code.sub(code.rsp, subtraction);
244 }
245
246 return ABI_SHADOW_SPACE;
247}
248
249inline void ABI_PopRegistersAndAdjustStackGPS(Xbyak::CodeGenerator& code, std::bitset<32> regs,
250 size_t rsp_alignment, size_t needed_frame_size = 0) {
251 s32 subtraction, xmm_offset;
252 ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size, &subtraction, &xmm_offset);
253
254 if (subtraction != 0) {
255 code.add(code.rsp, subtraction);
256 }
257
258 // GPRs need to be popped in reverse order
259 for (int i = 15; i >= 0; i--) {
260 if (regs[i]) {
261 code.pop(IndexToReg64(i));
262 }
263 }
264}
265
266} // namespace Common::X64
diff --git a/src/common/x64/xbyak_util.h b/src/common/x64/xbyak_util.h
new file mode 100644
index 000000000..df17f8cbe
--- /dev/null
+++ b/src/common/x64/xbyak_util.h
@@ -0,0 +1,47 @@
1// Copyright 2016 Citra Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include <xbyak.h>
9#include "common/x64/xbyak_abi.h"
10
11namespace Common::X64 {
12
13// Constants for use with cmpps/cmpss
14enum {
15 CMP_EQ = 0,
16 CMP_LT = 1,
17 CMP_LE = 2,
18 CMP_UNORD = 3,
19 CMP_NEQ = 4,
20 CMP_NLT = 5,
21 CMP_NLE = 6,
22 CMP_ORD = 7,
23};
24
25constexpr bool IsWithin2G(uintptr_t ref, uintptr_t target) {
26 const u64 distance = target - (ref + 5);
27 return !(distance >= 0x8000'0000ULL && distance <= ~0x8000'0000ULL);
28}
29
30inline bool IsWithin2G(const Xbyak::CodeGenerator& code, uintptr_t target) {
31 return IsWithin2G(reinterpret_cast<uintptr_t>(code.getCurr()), target);
32}
33
34template <typename T>
35inline void CallFarFunction(Xbyak::CodeGenerator& code, const T f) {
36 static_assert(std::is_pointer_v<T>, "Argument must be a (function) pointer.");
37 size_t addr = reinterpret_cast<size_t>(f);
38 if (IsWithin2G(code, addr)) {
39 code.call(f);
40 } else {
41 // ABI_RETURN is a safe temp register to use before a call
42 code.mov(ABI_RETURN, addr);
43 code.call(ABI_RETURN);
44 }
45}
46
47} // namespace Common::X64
diff --git a/src/core/file_sys/control_metadata.cpp b/src/core/file_sys/control_metadata.cpp
index f155a1341..63cd2eead 100644
--- a/src/core/file_sys/control_metadata.cpp
+++ b/src/core/file_sys/control_metadata.cpp
@@ -95,6 +95,10 @@ u32 NACP::GetSupportedLanguages() const {
95 return raw.supported_languages; 95 return raw.supported_languages;
96} 96}
97 97
98u64 NACP::GetDeviceSaveDataSize() const {
99 return raw.device_save_data_size;
100}
101
98std::vector<u8> NACP::GetRawBytes() const { 102std::vector<u8> NACP::GetRawBytes() const {
99 std::vector<u8> out(sizeof(RawNACP)); 103 std::vector<u8> out(sizeof(RawNACP));
100 std::memcpy(out.data(), &raw, sizeof(RawNACP)); 104 std::memcpy(out.data(), &raw, sizeof(RawNACP));
diff --git a/src/core/file_sys/control_metadata.h b/src/core/file_sys/control_metadata.h
index 2d8c251ac..e37b2fadf 100644
--- a/src/core/file_sys/control_metadata.h
+++ b/src/core/file_sys/control_metadata.h
@@ -113,6 +113,7 @@ public:
113 u32 GetSupportedLanguages() const; 113 u32 GetSupportedLanguages() const;
114 std::vector<u8> GetRawBytes() const; 114 std::vector<u8> GetRawBytes() const;
115 bool GetUserAccountSwitchLock() const; 115 bool GetUserAccountSwitchLock() const;
116 u64 GetDeviceSaveDataSize() const;
116 117
117private: 118private:
118 RawNACP raw{}; 119 RawNACP raw{};
diff --git a/src/core/file_sys/patch_manager.cpp b/src/core/file_sys/patch_manager.cpp
index b93aa6935..c47ff863e 100644
--- a/src/core/file_sys/patch_manager.cpp
+++ b/src/core/file_sys/patch_manager.cpp
@@ -10,6 +10,7 @@
10#include "common/file_util.h" 10#include "common/file_util.h"
11#include "common/hex_util.h" 11#include "common/hex_util.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/string_util.h"
13#include "core/core.h" 14#include "core/core.h"
14#include "core/file_sys/content_archive.h" 15#include "core/file_sys/content_archive.h"
15#include "core/file_sys/control_metadata.h" 16#include "core/file_sys/control_metadata.h"
@@ -48,6 +49,23 @@ std::string FormatTitleVersion(u32 version, TitleVersionFormat format) {
48 return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]); 49 return fmt::format("v{}.{}.{}", bytes[3], bytes[2], bytes[1]);
49} 50}
50 51
52std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
53 std::string_view name) {
54#ifdef _WIN32
55 return dir->GetSubdirectory(name);
56#else
57 const auto subdirs = dir->GetSubdirectories();
58 for (const auto& subdir : subdirs) {
59 std::string dir_name = Common::ToLower(subdir->GetName());
60 if (dir_name == name) {
61 return subdir;
62 }
63 }
64
65 return nullptr;
66#endif
67}
68
51PatchManager::PatchManager(u64 title_id) : title_id(title_id) {} 69PatchManager::PatchManager(u64 title_id) : title_id(title_id) {}
52 70
53PatchManager::~PatchManager() = default; 71PatchManager::~PatchManager() = default;
@@ -104,7 +122,7 @@ VirtualDir PatchManager::PatchExeFS(VirtualDir exefs) const {
104 if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end()) 122 if (std::find(disabled.begin(), disabled.end(), subdir->GetName()) != disabled.end())
105 continue; 123 continue;
106 124
107 auto exefs_dir = subdir->GetSubdirectory("exefs"); 125 auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
108 if (exefs_dir != nullptr) 126 if (exefs_dir != nullptr)
109 layers.push_back(std::move(exefs_dir)); 127 layers.push_back(std::move(exefs_dir));
110 } 128 }
@@ -130,7 +148,7 @@ std::vector<VirtualFile> PatchManager::CollectPatches(const std::vector<VirtualD
130 if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend()) 148 if (std::find(disabled.cbegin(), disabled.cend(), subdir->GetName()) != disabled.cend())
131 continue; 149 continue;
132 150
133 auto exefs_dir = subdir->GetSubdirectory("exefs"); 151 auto exefs_dir = FindSubdirectoryCaseless(subdir, "exefs");
134 if (exefs_dir != nullptr) { 152 if (exefs_dir != nullptr) {
135 for (const auto& file : exefs_dir->GetFiles()) { 153 for (const auto& file : exefs_dir->GetFiles()) {
136 if (file->GetExtension() == "ips") { 154 if (file->GetExtension() == "ips") {
@@ -295,7 +313,7 @@ std::vector<Core::Memory::CheatEntry> PatchManager::CreateCheatList(
295 continue; 313 continue;
296 } 314 }
297 315
298 auto cheats_dir = subdir->GetSubdirectory("cheats"); 316 auto cheats_dir = FindSubdirectoryCaseless(subdir, "cheats");
299 if (cheats_dir != nullptr) { 317 if (cheats_dir != nullptr) {
300 auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true); 318 auto res = ReadCheatFileFromFolder(system, title_id, build_id_, cheats_dir, true);
301 if (res.has_value()) { 319 if (res.has_value()) {
@@ -340,11 +358,11 @@ static void ApplyLayeredFS(VirtualFile& romfs, u64 title_id, ContentRecordType t
340 continue; 358 continue;
341 } 359 }
342 360
343 auto romfs_dir = subdir->GetSubdirectory("romfs"); 361 auto romfs_dir = FindSubdirectoryCaseless(subdir, "romfs");
344 if (romfs_dir != nullptr) 362 if (romfs_dir != nullptr)
345 layers.push_back(std::move(romfs_dir)); 363 layers.push_back(std::move(romfs_dir));
346 364
347 auto ext_dir = subdir->GetSubdirectory("romfs_ext"); 365 auto ext_dir = FindSubdirectoryCaseless(subdir, "romfs_ext");
348 if (ext_dir != nullptr) 366 if (ext_dir != nullptr)
349 layers_ext.push_back(std::move(ext_dir)); 367 layers_ext.push_back(std::move(ext_dir));
350 } 368 }
@@ -470,7 +488,7 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
470 for (const auto& mod : mod_dir->GetSubdirectories()) { 488 for (const auto& mod : mod_dir->GetSubdirectories()) {
471 std::string types; 489 std::string types;
472 490
473 const auto exefs_dir = mod->GetSubdirectory("exefs"); 491 const auto exefs_dir = FindSubdirectoryCaseless(mod, "exefs");
474 if (IsDirValidAndNonEmpty(exefs_dir)) { 492 if (IsDirValidAndNonEmpty(exefs_dir)) {
475 bool ips = false; 493 bool ips = false;
476 bool ipswitch = false; 494 bool ipswitch = false;
@@ -494,9 +512,9 @@ std::map<std::string, std::string, std::less<>> PatchManager::GetPatchVersionNam
494 if (layeredfs) 512 if (layeredfs)
495 AppendCommaIfNotEmpty(types, "LayeredExeFS"); 513 AppendCommaIfNotEmpty(types, "LayeredExeFS");
496 } 514 }
497 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("romfs"))) 515 if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "romfs")))
498 AppendCommaIfNotEmpty(types, "LayeredFS"); 516 AppendCommaIfNotEmpty(types, "LayeredFS");
499 if (IsDirValidAndNonEmpty(mod->GetSubdirectory("cheats"))) 517 if (IsDirValidAndNonEmpty(FindSubdirectoryCaseless(mod, "cheats")))
500 AppendCommaIfNotEmpty(types, "Cheats"); 518 AppendCommaIfNotEmpty(types, "Cheats");
501 519
502 if (types.empty()) 520 if (types.empty())
diff --git a/src/core/file_sys/patch_manager.h b/src/core/file_sys/patch_manager.h
index ec6db524d..f4cb918dd 100644
--- a/src/core/file_sys/patch_manager.h
+++ b/src/core/file_sys/patch_manager.h
@@ -29,6 +29,11 @@ enum class TitleVersionFormat : u8 {
29std::string FormatTitleVersion(u32 version, 29std::string FormatTitleVersion(u32 version,
30 TitleVersionFormat format = TitleVersionFormat::ThreeElements); 30 TitleVersionFormat format = TitleVersionFormat::ThreeElements);
31 31
32// Returns a directory with name matching name case-insensitive. Returns nullptr if directory
33// doesn't have a directory with name.
34std::shared_ptr<VfsDirectory> FindSubdirectoryCaseless(const std::shared_ptr<VfsDirectory> dir,
35 std::string_view name);
36
32// A centralized class to manage patches to games. 37// A centralized class to manage patches to games.
33class PatchManager { 38class PatchManager {
34public: 39public:
diff --git a/src/core/file_sys/savedata_factory.cpp b/src/core/file_sys/savedata_factory.cpp
index f3def93ab..adfd2c1a4 100644
--- a/src/core/file_sys/savedata_factory.cpp
+++ b/src/core/file_sys/savedata_factory.cpp
@@ -57,7 +57,8 @@ void PrintSaveDataDescriptorWarnings(SaveDataDescriptor meta) {
57bool ShouldSaveDataBeAutomaticallyCreated(SaveDataSpaceId space, const SaveDataDescriptor& desc) { 57bool ShouldSaveDataBeAutomaticallyCreated(SaveDataSpaceId space, const SaveDataDescriptor& desc) {
58 return desc.type == SaveDataType::CacheStorage || desc.type == SaveDataType::TemporaryStorage || 58 return desc.type == SaveDataType::CacheStorage || desc.type == SaveDataType::TemporaryStorage ||
59 (space == SaveDataSpaceId::NandUser && ///< Normal Save Data -- Current Title & User 59 (space == SaveDataSpaceId::NandUser && ///< Normal Save Data -- Current Title & User
60 desc.type == SaveDataType::SaveData && desc.title_id == 0 && desc.save_id == 0); 60 (desc.type == SaveDataType::SaveData || desc.type == SaveDataType::DeviceSaveData) &&
61 desc.title_id == 0 && desc.save_id == 0);
61} 62}
62 63
63} // Anonymous namespace 64} // Anonymous namespace
@@ -139,8 +140,10 @@ std::string SaveDataFactory::GetFullPath(SaveDataSpaceId space, SaveDataType typ
139 u128 user_id, u64 save_id) { 140 u128 user_id, u64 save_id) {
140 // According to switchbrew, if a save is of type SaveData and the title id field is 0, it should 141 // According to switchbrew, if a save is of type SaveData and the title id field is 0, it should
141 // be interpreted as the title id of the current process. 142 // be interpreted as the title id of the current process.
142 if (type == SaveDataType::SaveData && title_id == 0) { 143 if (type == SaveDataType::SaveData || type == SaveDataType::DeviceSaveData) {
143 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); 144 if (title_id == 0) {
145 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
146 }
144 } 147 }
145 148
146 std::string out = GetSaveDataSpaceIdPath(space); 149 std::string out = GetSaveDataSpaceIdPath(space);
diff --git a/src/core/file_sys/system_archive/system_version.cpp b/src/core/file_sys/system_archive/system_version.cpp
index 6e22f97b0..aa313de66 100644
--- a/src/core/file_sys/system_archive/system_version.cpp
+++ b/src/core/file_sys/system_archive/system_version.cpp
@@ -12,17 +12,17 @@ namespace SystemVersionData {
12// This section should reflect the best system version to describe yuzu's HLE api. 12// This section should reflect the best system version to describe yuzu's HLE api.
13// TODO(DarkLordZach): Update when HLE gets better. 13// TODO(DarkLordZach): Update when HLE gets better.
14 14
15constexpr u8 VERSION_MAJOR = 5; 15constexpr u8 VERSION_MAJOR = 10;
16constexpr u8 VERSION_MINOR = 1; 16constexpr u8 VERSION_MINOR = 0;
17constexpr u8 VERSION_MICRO = 0; 17constexpr u8 VERSION_MICRO = 2;
18 18
19constexpr u8 REVISION_MAJOR = 3; 19constexpr u8 REVISION_MAJOR = 1;
20constexpr u8 REVISION_MINOR = 0; 20constexpr u8 REVISION_MINOR = 0;
21 21
22constexpr char PLATFORM_STRING[] = "NX"; 22constexpr char PLATFORM_STRING[] = "NX";
23constexpr char VERSION_HASH[] = "23f9df53e25709d756e0c76effcb2473bd3447dd"; 23constexpr char VERSION_HASH[] = "f90143fa8bbc061d4f68c35f95f04f8080c0ecdc";
24constexpr char DISPLAY_VERSION[] = "5.1.0"; 24constexpr char DISPLAY_VERSION[] = "10.0.2";
25constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 5.1.0-3.0"; 25constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 10.0.2-1.0";
26 26
27} // namespace SystemVersionData 27} // namespace SystemVersionData
28 28
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index eda466a5d..9a081fbd4 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -46,7 +46,7 @@ private:
46EmuWindow::EmuWindow() { 46EmuWindow::EmuWindow() {
47 // TODO: Find a better place to set this. 47 // TODO: Find a better place to set this.
48 config.min_client_area_size = 48 config.min_client_area_size =
49 std::make_pair(Layout::ScreenUndocked::Width, Layout::ScreenUndocked::Height); 49 std::make_pair(Layout::MinimumSize::Width, Layout::MinimumSize::Height);
50 active_config = config; 50 active_config = config;
51 touch_state = std::make_shared<TouchState>(); 51 touch_state = std::make_shared<TouchState>();
52 Input::RegisterFactory<Input::TouchDevice>("emu_window", touch_state); 52 Input::RegisterFactory<Input::TouchDevice>("emu_window", touch_state);
diff --git a/src/core/frontend/framebuffer_layout.h b/src/core/frontend/framebuffer_layout.h
index 15ecfb13d..91ecc30ab 100644
--- a/src/core/frontend/framebuffer_layout.h
+++ b/src/core/frontend/framebuffer_layout.h
@@ -8,6 +8,11 @@
8 8
9namespace Layout { 9namespace Layout {
10 10
11namespace MinimumSize {
12constexpr u32 Width = 640;
13constexpr u32 Height = 360;
14} // namespace MinimumSize
15
11namespace ScreenUndocked { 16namespace ScreenUndocked {
12constexpr u32 Width = 1280; 17constexpr u32 Width = 1280;
13constexpr u32 Height = 720; 18constexpr u32 Height = 720;
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index f6503fe2f..20c331b77 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -767,7 +767,7 @@ FSP_SRV::FSP_SRV(FileSystemController& fsc, const Core::Reporter& reporter)
767 {1014, nullptr, "OutputMultiProgramTagAccessLog"}, 767 {1014, nullptr, "OutputMultiProgramTagAccessLog"},
768 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"}, 768 {1100, nullptr, "OverrideSaveDataTransferTokenSignVerificationKey"},
769 {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"}, 769 {1110, nullptr, "CorruptSaveDataFileSystemBySaveDataSpaceId2"},
770 {1200, nullptr, "OpenMultiCommitManager"}, 770 {1200, &FSP_SRV::OpenMultiCommitManager, "OpenMultiCommitManager"},
771 {1300, nullptr, "OpenBisWiper"}, 771 {1300, nullptr, "OpenBisWiper"},
772 }; 772 };
773 // clang-format on 773 // clang-format on
@@ -988,4 +988,40 @@ void FSP_SRV::GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx) {
988 rb.Push(access_log_program_index); 988 rb.Push(access_log_program_index);
989} 989}
990 990
991class IMultiCommitManager final : public ServiceFramework<IMultiCommitManager> {
992public:
993 explicit IMultiCommitManager() : ServiceFramework("IMultiCommitManager") {
994 static const FunctionInfo functions[] = {
995 {1, &IMultiCommitManager::Add, "Add"},
996 {2, &IMultiCommitManager::Commit, "Commit"},
997 };
998 RegisterHandlers(functions);
999 }
1000
1001private:
1002 FileSys::VirtualFile backend;
1003
1004 void Add(Kernel::HLERequestContext& ctx) {
1005 LOG_WARNING(Service_FS, "(STUBBED) called");
1006
1007 IPC::ResponseBuilder rb{ctx, 2};
1008 rb.Push(RESULT_SUCCESS);
1009 }
1010
1011 void Commit(Kernel::HLERequestContext& ctx) {
1012 LOG_WARNING(Service_FS, "(STUBBED) called");
1013
1014 IPC::ResponseBuilder rb{ctx, 2};
1015 rb.Push(RESULT_SUCCESS);
1016 }
1017};
1018
1019void FSP_SRV::OpenMultiCommitManager(Kernel::HLERequestContext& ctx) {
1020 LOG_DEBUG(Service_FS, "called");
1021
1022 IPC::ResponseBuilder rb{ctx, 2, 0, 1};
1023 rb.Push(RESULT_SUCCESS);
1024 rb.PushIpcInterface<IMultiCommitManager>(std::make_shared<IMultiCommitManager>());
1025}
1026
991} // namespace Service::FileSystem 1027} // namespace Service::FileSystem
diff --git a/src/core/hle/service/filesystem/fsp_srv.h b/src/core/hle/service/filesystem/fsp_srv.h
index d52b55999..dfb3e395b 100644
--- a/src/core/hle/service/filesystem/fsp_srv.h
+++ b/src/core/hle/service/filesystem/fsp_srv.h
@@ -50,6 +50,7 @@ private:
50 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx); 50 void OpenPatchDataStorageByCurrentProcess(Kernel::HLERequestContext& ctx);
51 void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx); 51 void OutputAccessLogToSdCard(Kernel::HLERequestContext& ctx);
52 void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx); 52 void GetAccessLogVersionInfo(Kernel::HLERequestContext& ctx);
53 void OpenMultiCommitManager(Kernel::HLERequestContext& ctx);
53 54
54 FileSystemController& fsc; 55 FileSystemController& fsc;
55 56
diff --git a/src/core/hle/service/hid/controllers/keyboard.cpp b/src/core/hle/service/hid/controllers/keyboard.cpp
index 358cb9329..9a8d354ba 100644
--- a/src/core/hle/service/hid/controllers/keyboard.cpp
+++ b/src/core/hle/service/hid/controllers/keyboard.cpp
@@ -38,10 +38,11 @@ void Controller_Keyboard::OnUpdate(const Core::Timing::CoreTiming& core_timing,
38 cur_entry.sampling_number = last_entry.sampling_number + 1; 38 cur_entry.sampling_number = last_entry.sampling_number + 1;
39 cur_entry.sampling_number2 = cur_entry.sampling_number; 39 cur_entry.sampling_number2 = cur_entry.sampling_number;
40 40
41 cur_entry.key.fill(0);
42 cur_entry.modifier = 0;
43
41 for (std::size_t i = 0; i < keyboard_keys.size(); ++i) { 44 for (std::size_t i = 0; i < keyboard_keys.size(); ++i) {
42 for (std::size_t k = 0; k < KEYS_PER_BYTE; ++k) { 45 cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << (i % KEYS_PER_BYTE));
43 cur_entry.key[i / KEYS_PER_BYTE] |= (keyboard_keys[i]->GetStatus() << k);
44 }
45 } 46 }
46 47
47 for (std::size_t i = 0; i < keyboard_mods.size(); ++i) { 48 for (std::size_t i = 0; i < keyboard_mods.size(); ++i) {
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index 767158444..01ddcdbd6 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -177,7 +177,8 @@ private:
177 void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) { 177 void CreateTemporaryNetworkProfile(Kernel::HLERequestContext& ctx) {
178 LOG_DEBUG(Service_NIFM, "called"); 178 LOG_DEBUG(Service_NIFM, "called");
179 179
180 ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c, "NetworkProfileData is not the correct size"); 180 ASSERT_MSG(ctx.GetReadBufferSize() == 0x17c,
181 "SfNetworkProfileData is not the correct size");
181 u128 uuid{}; 182 u128 uuid{};
182 auto buffer = ctx.ReadBuffer(); 183 auto buffer = ctx.ReadBuffer();
183 std::memcpy(&uuid, buffer.data() + 8, sizeof(u128)); 184 std::memcpy(&uuid, buffer.data() + 8, sizeof(u128));
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index f1e3d832a..caca80dde 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -138,9 +138,7 @@ u32 BufferQueue::Query(QueryType type) {
138 138
139 switch (type) { 139 switch (type) {
140 case QueryType::NativeWindowFormat: 140 case QueryType::NativeWindowFormat:
141 // TODO(Subv): Use an enum for this 141 return static_cast<u32>(PixelFormat::RGBA8888);
142 static constexpr u32 FormatABGR8 = 1;
143 return FormatABGR8;
144 } 142 }
145 143
146 UNIMPLEMENTED(); 144 UNIMPLEMENTED();
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index d5f31e567..8a837e5aa 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -66,6 +66,16 @@ public:
66 Rotate270 = 0x07, 66 Rotate270 = 0x07,
67 }; 67 };
68 68
69 enum class PixelFormat : u32 {
70 RGBA8888 = 1,
71 RGBX8888 = 2,
72 RGB888 = 3,
73 RGB565 = 4,
74 BGRA8888 = 5,
75 RGBA5551 = 6,
76 RRGBA4444 = 7,
77 };
78
69 struct Buffer { 79 struct Buffer {
70 enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 }; 80 enum class Status { Free = 0, Queued = 1, Dequeued = 2, Acquired = 3 };
71 81
diff --git a/src/core/hle/service/time/time_manager.cpp b/src/core/hle/service/time/time_manager.cpp
index 9d6c55865..b4dfe45e5 100644
--- a/src/core/hle/service/time/time_manager.cpp
+++ b/src/core/hle/service/time/time_manager.cpp
@@ -5,6 +5,7 @@
5#include <chrono> 5#include <chrono>
6#include <ctime> 6#include <ctime>
7 7
8#include "common/time_zone.h"
8#include "core/hle/service/time/ephemeral_network_system_clock_context_writer.h" 9#include "core/hle/service/time/ephemeral_network_system_clock_context_writer.h"
9#include "core/hle/service/time/local_system_clock_context_writer.h" 10#include "core/hle/service/time/local_system_clock_context_writer.h"
10#include "core/hle/service/time/network_system_clock_context_writer.h" 11#include "core/hle/service/time/network_system_clock_context_writer.h"
@@ -21,8 +22,16 @@ static std::chrono::seconds GetSecondsSinceEpoch() {
21 Settings::values.custom_rtc_differential; 22 Settings::values.custom_rtc_differential;
22} 23}
23 24
25static s64 GetExternalTimeZoneOffset() {
26 // With "auto" timezone setting, we use the external system's timezone offset
27 if (Settings::GetTimeZoneString() == "auto") {
28 return Common::TimeZone::GetCurrentOffsetSeconds().count();
29 }
30 return 0;
31}
32
24static s64 GetExternalRtcValue() { 33static s64 GetExternalRtcValue() {
25 return GetSecondsSinceEpoch().count(); 34 return GetSecondsSinceEpoch().count() + GetExternalTimeZoneOffset();
26} 35}
27 36
28TimeManager::TimeManager(Core::System& system) 37TimeManager::TimeManager(Core::System& system)
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp
index 78d4acd95..c070d6e97 100644
--- a/src/core/hle/service/time/time_zone_content_manager.cpp
+++ b/src/core/hle/service/time/time_zone_content_manager.cpp
@@ -5,6 +5,7 @@
5#include <sstream> 5#include <sstream>
6 6
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/time_zone.h"
8#include "core/core.h" 9#include "core/core.h"
9#include "core/file_sys/content_archive.h" 10#include "core/file_sys/content_archive.h"
10#include "core/file_sys/nca_metadata.h" 11#include "core/file_sys/nca_metadata.h"
@@ -14,6 +15,7 @@
14#include "core/hle/service/filesystem/filesystem.h" 15#include "core/hle/service/filesystem/filesystem.h"
15#include "core/hle/service/time/time_manager.h" 16#include "core/hle/service/time/time_manager.h"
16#include "core/hle/service/time/time_zone_content_manager.h" 17#include "core/hle/service/time/time_zone_content_manager.h"
18#include "core/settings.h"
17 19
18namespace Service::Time::TimeZone { 20namespace Service::Time::TimeZone {
19 21
@@ -68,10 +70,22 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) {
68 70
69TimeZoneContentManager::TimeZoneContentManager(TimeManager& time_manager, Core::System& system) 71TimeZoneContentManager::TimeZoneContentManager(TimeManager& time_manager, Core::System& system)
70 : system{system}, location_name_cache{BuildLocationNameCache(system)} { 72 : system{system}, location_name_cache{BuildLocationNameCache(system)} {
71 if (FileSys::VirtualFile vfs_file; GetTimeZoneInfoFile("GMT", vfs_file) == RESULT_SUCCESS) { 73
74 std::string location_name;
75 const auto timezone_setting = Settings::GetTimeZoneString();
76 if (timezone_setting == "auto") {
77 location_name = Common::TimeZone::GetDefaultTimeZone();
78 } else if (timezone_setting == "default") {
79 location_name = location_name;
80 } else {
81 location_name = timezone_setting;
82 }
83
84 if (FileSys::VirtualFile vfs_file;
85 GetTimeZoneInfoFile(location_name, vfs_file) == RESULT_SUCCESS) {
72 const auto time_point{ 86 const auto time_point{
73 time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(system)}; 87 time_manager.GetStandardSteadyClockCore().GetCurrentTimePoint(system)};
74 time_manager.SetupTimeZoneManager("GMT", time_point, location_name_cache.size(), {}, 88 time_manager.SetupTimeZoneManager(location_name, time_point, location_name_cache.size(), {},
75 vfs_file); 89 vfs_file);
76 } else { 90 } else {
77 time_zone_manager.MarkAsInitialized(); 91 time_zone_manager.MarkAsInitialized();
@@ -114,6 +128,12 @@ ResultCode TimeZoneContentManager::GetTimeZoneInfoFile(const std::string& locati
114 128
115 vfs_file = zoneinfo_dir->GetFile(location_name); 129 vfs_file = zoneinfo_dir->GetFile(location_name);
116 if (!vfs_file) { 130 if (!vfs_file) {
131 LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"! Using default timezone.",
132 time_zone_binary_titleid, location_name);
133 vfs_file = zoneinfo_dir->GetFile(Common::TimeZone::GetDefaultTimeZone());
134 }
135
136 if (!vfs_file) {
117 LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"!", time_zone_binary_titleid, 137 LOG_ERROR(Service_Time, "{:016X} has no file \"{}\"!", time_zone_binary_titleid,
118 location_name); 138 location_name);
119 return ERROR_TIME_NOT_FOUND; 139 return ERROR_TIME_NOT_FOUND;
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index 2b0bdc4d3..4edff9cd8 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -63,6 +63,21 @@ const std::array<const char*, NumMouseButtons> mapping = {{
63 63
64Values values = {}; 64Values values = {};
65 65
66std::string GetTimeZoneString() {
67 static constexpr std::array<const char*, 46> timezones{{
68 "auto", "default", "CET", "CST6CDT", "Cuba", "EET", "Egypt", "Eire",
69 "EST", "EST5EDT", "GB", "GB-Eire", "GMT", "GMT+0", "GMT-0", "GMT0",
70 "Greenwich", "Hongkong", "HST", "Iceland", "Iran", "Israel", "Jamaica", "Japan",
71 "Kwajalein", "Libya", "MET", "MST", "MST7MDT", "Navajo", "NZ", "NZ-CHAT",
72 "Poland", "Portugal", "PRC", "PST8PDT", "ROC", "ROK", "Singapore", "Turkey",
73 "UCT", "Universal", "UTC", "W-SU", "WET", "Zulu",
74 }};
75
76 ASSERT(Settings::values.time_zone_index < timezones.size());
77
78 return timezones[Settings::values.time_zone_index];
79}
80
66void Apply() { 81void Apply() {
67 GDBStub::SetServerPort(values.gdbstub_port); 82 GDBStub::SetServerPort(values.gdbstub_port);
68 GDBStub::ToggleServer(values.use_gdbstub); 83 GDBStub::ToggleServer(values.use_gdbstub);
@@ -87,6 +102,7 @@ void LogSettings() {
87 LogSetting("System_CurrentUser", Settings::values.current_user); 102 LogSetting("System_CurrentUser", Settings::values.current_user);
88 LogSetting("System_LanguageIndex", Settings::values.language_index); 103 LogSetting("System_LanguageIndex", Settings::values.language_index);
89 LogSetting("System_RegionIndex", Settings::values.region_index); 104 LogSetting("System_RegionIndex", Settings::values.region_index);
105 LogSetting("System_TimeZoneIndex", Settings::values.time_zone_index);
90 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); 106 LogSetting("Core_UseMultiCore", Settings::values.use_multi_core);
91 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); 107 LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor);
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 108 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
@@ -96,6 +112,7 @@ void LogSettings() {
96 LogSetting("Renderer_UseAsynchronousGpuEmulation", 112 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation); 113 Settings::values.use_asynchronous_gpu_emulation);
98 LogSetting("Renderer_UseVsync", Settings::values.use_vsync); 114 LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
115 LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
99 LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy); 116 LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy);
100 LogSetting("Audio_OutputEngine", Settings::values.sink_id); 117 LogSetting("Audio_OutputEngine", Settings::values.sink_id);
101 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); 118 LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
diff --git a/src/core/settings.h b/src/core/settings.h
index 163900f0b..78eb33737 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -394,6 +394,7 @@ struct Values {
394 s32 current_user; 394 s32 current_user;
395 s32 language_index; 395 s32 language_index;
396 s32 region_index; 396 s32 region_index;
397 s32 time_zone_index;
397 s32 sound_index; 398 s32 sound_index;
398 399
399 // Controls 400 // Controls
@@ -445,6 +446,7 @@ struct Values {
445 GPUAccuracy gpu_accuracy; 446 GPUAccuracy gpu_accuracy;
446 bool use_asynchronous_gpu_emulation; 447 bool use_asynchronous_gpu_emulation;
447 bool use_vsync; 448 bool use_vsync;
449 bool use_assembly_shaders;
448 bool force_30fps_mode; 450 bool force_30fps_mode;
449 bool use_fast_gpu_time; 451 bool use_fast_gpu_time;
450 452
@@ -490,6 +492,9 @@ struct Values {
490bool IsGPULevelExtreme(); 492bool IsGPULevelExtreme();
491bool IsGPULevelHigh(); 493bool IsGPULevelHigh();
492 494
495std::string GetTimeZoneString();
496
493void Apply(); 497void Apply();
494void LogSettings(); 498void LogSettings();
499
495} // namespace Settings 500} // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 1c3b03a1c..c781b3cfc 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
201 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", 201 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
202 Settings::values.use_asynchronous_gpu_emulation); 202 Settings::values.use_asynchronous_gpu_emulation);
203 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); 203 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
204 AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
204 AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode); 205 AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
205} 206}
206 207
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index d23c53843..d6ee82836 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,6 +1,7 @@
1add_library(video_core STATIC 1add_library(video_core STATIC
2 buffer_cache/buffer_block.h 2 buffer_cache/buffer_block.h
3 buffer_cache/buffer_cache.h 3 buffer_cache/buffer_cache.h
4 buffer_cache/map_interval.cpp
4 buffer_cache/map_interval.h 5 buffer_cache/map_interval.h
5 dirty_flags.cpp 6 dirty_flags.cpp
6 dirty_flags.h 7 dirty_flags.h
@@ -228,7 +229,7 @@ endif()
228create_target_directory_groups(video_core) 229create_target_directory_groups(video_core)
229 230
230target_link_libraries(video_core PUBLIC common core) 231target_link_libraries(video_core PUBLIC common core)
231target_link_libraries(video_core PRIVATE glad) 232target_link_libraries(video_core PRIVATE glad xbyak)
232 233
233if (ENABLE_VULKAN) 234if (ENABLE_VULKAN)
234 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) 235 target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 56e570994..d9a4a1b4d 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -12,11 +12,12 @@
12#include <utility> 12#include <utility>
13#include <vector> 13#include <vector>
14 14
15#include <boost/icl/interval_map.hpp> 15#include <boost/container/small_vector.hpp>
16#include <boost/icl/interval_set.hpp> 16#include <boost/icl/interval_set.hpp>
17#include <boost/range/iterator_range.hpp> 17#include <boost/intrusive/set.hpp>
18 18
19#include "common/alignment.h" 19#include "common/alignment.h"
20#include "common/assert.h"
20#include "common/common_types.h" 21#include "common/common_types.h"
21#include "common/logging/log.h" 22#include "common/logging/log.h"
22#include "core/core.h" 23#include "core/core.h"
@@ -29,10 +30,12 @@
29 30
30namespace VideoCommon { 31namespace VideoCommon {
31 32
32using MapInterval = std::shared_ptr<MapIntervalBase>;
33
34template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> 33template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
35class BufferCache { 34class BufferCache {
35 using IntervalSet = boost::icl::interval_set<VAddr>;
36 using IntervalType = typename IntervalSet::interval_type;
37 using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>;
38
36public: 39public:
37 using BufferInfo = std::pair<BufferType, u64>; 40 using BufferInfo = std::pair<BufferType, u64>;
38 41
@@ -40,14 +43,12 @@ public:
40 bool is_written = false, bool use_fast_cbuf = false) { 43 bool is_written = false, bool use_fast_cbuf = false) {
41 std::lock_guard lock{mutex}; 44 std::lock_guard lock{mutex};
42 45
43 const std::optional<VAddr> cpu_addr_opt = 46 const auto& memory_manager = system.GPU().MemoryManager();
44 system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); 47 const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
45
46 if (!cpu_addr_opt) { 48 if (!cpu_addr_opt) {
47 return {GetEmptyBuffer(size), 0}; 49 return {GetEmptyBuffer(size), 0};
48 } 50 }
49 51 const VAddr cpu_addr = *cpu_addr_opt;
50 VAddr cpu_addr = *cpu_addr_opt;
51 52
52 // Cache management is a big overhead, so only cache entries with a given size. 53 // Cache management is a big overhead, so only cache entries with a given size.
53 // TODO: Figure out which size is the best for given games. 54 // TODO: Figure out which size is the best for given games.
@@ -77,16 +78,19 @@ public:
77 } 78 }
78 } 79 }
79 80
80 auto block = GetBlock(cpu_addr, size); 81 OwnerBuffer block = GetBlock(cpu_addr, size);
81 auto map = MapAddress(block, gpu_addr, cpu_addr, size); 82 MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
83 if (!map) {
84 return {GetEmptyBuffer(size), 0};
85 }
82 if (is_written) { 86 if (is_written) {
83 map->MarkAsModified(true, GetModifiedTicks()); 87 map->MarkAsModified(true, GetModifiedTicks());
84 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { 88 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
85 MarkForAsyncFlush(map); 89 MarkForAsyncFlush(map);
86 } 90 }
87 if (!map->IsWritten()) { 91 if (!map->is_written) {
88 map->MarkAsWritten(true); 92 map->is_written = true;
89 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 93 MarkRegionAsWritten(map->start, map->end - 1);
90 } 94 }
91 } 95 }
92 96
@@ -132,12 +136,11 @@ public:
132 void FlushRegion(VAddr addr, std::size_t size) { 136 void FlushRegion(VAddr addr, std::size_t size) {
133 std::lock_guard lock{mutex}; 137 std::lock_guard lock{mutex};
134 138
135 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 139 VectorMapInterval objects = GetMapsInRange(addr, size);
136 std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { 140 std::sort(objects.begin(), objects.end(),
137 return a->GetModificationTick() < b->GetModificationTick(); 141 [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; });
138 }); 142 for (MapInterval* object : objects) {
139 for (auto& object : objects) { 143 if (object->is_modified && object->is_registered) {
140 if (object->IsModified() && object->IsRegistered()) {
141 mutex.unlock(); 144 mutex.unlock();
142 FlushMap(object); 145 FlushMap(object);
143 mutex.lock(); 146 mutex.lock();
@@ -148,9 +151,9 @@ public:
148 bool MustFlushRegion(VAddr addr, std::size_t size) { 151 bool MustFlushRegion(VAddr addr, std::size_t size) {
149 std::lock_guard lock{mutex}; 152 std::lock_guard lock{mutex};
150 153
151 const std::vector<MapInterval> objects = GetMapsInRange(addr, size); 154 const VectorMapInterval objects = GetMapsInRange(addr, size);
152 return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { 155 return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) {
153 return map->IsModified() && map->IsRegistered(); 156 return map->is_modified && map->is_registered;
154 }); 157 });
155 } 158 }
156 159
@@ -158,9 +161,8 @@ public:
158 void InvalidateRegion(VAddr addr, u64 size) { 161 void InvalidateRegion(VAddr addr, u64 size) {
159 std::lock_guard lock{mutex}; 162 std::lock_guard lock{mutex};
160 163
161 std::vector<MapInterval> objects = GetMapsInRange(addr, size); 164 for (auto& object : GetMapsInRange(addr, size)) {
162 for (auto& object : objects) { 165 if (object->is_registered) {
163 if (object->IsRegistered()) {
164 Unregister(object); 166 Unregister(object);
165 } 167 }
166 } 168 }
@@ -169,10 +171,10 @@ public:
169 void OnCPUWrite(VAddr addr, std::size_t size) { 171 void OnCPUWrite(VAddr addr, std::size_t size) {
170 std::lock_guard lock{mutex}; 172 std::lock_guard lock{mutex};
171 173
172 for (const auto& object : GetMapsInRange(addr, size)) { 174 for (MapInterval* object : GetMapsInRange(addr, size)) {
173 if (object->IsMemoryMarked() && object->IsRegistered()) { 175 if (object->is_memory_marked && object->is_registered) {
174 UnmarkMemory(object); 176 UnmarkMemory(object);
175 object->SetSyncPending(true); 177 object->is_sync_pending = true;
176 marked_for_unregister.emplace_back(object); 178 marked_for_unregister.emplace_back(object);
177 } 179 }
178 } 180 }
@@ -181,9 +183,9 @@ public:
181 void SyncGuestHost() { 183 void SyncGuestHost() {
182 std::lock_guard lock{mutex}; 184 std::lock_guard lock{mutex};
183 185
184 for (const auto& object : marked_for_unregister) { 186 for (auto& object : marked_for_unregister) {
185 if (object->IsRegistered()) { 187 if (object->is_registered) {
186 object->SetSyncPending(false); 188 object->is_sync_pending = false;
187 Unregister(object); 189 Unregister(object);
188 } 190 }
189 } 191 }
@@ -192,9 +194,9 @@ public:
192 194
193 void CommitAsyncFlushes() { 195 void CommitAsyncFlushes() {
194 if (uncommitted_flushes) { 196 if (uncommitted_flushes) {
195 auto commit_list = std::make_shared<std::list<MapInterval>>(); 197 auto commit_list = std::make_shared<std::list<MapInterval*>>();
196 for (auto& map : *uncommitted_flushes) { 198 for (MapInterval* map : *uncommitted_flushes) {
197 if (map->IsRegistered() && map->IsModified()) { 199 if (map->is_registered && map->is_modified) {
198 // TODO(Blinkhawk): Implement backend asynchronous flushing 200 // TODO(Blinkhawk): Implement backend asynchronous flushing
199 // AsyncFlushMap(map) 201 // AsyncFlushMap(map)
200 commit_list->push_back(map); 202 commit_list->push_back(map);
@@ -228,8 +230,8 @@ public:
228 committed_flushes.pop_front(); 230 committed_flushes.pop_front();
229 return; 231 return;
230 } 232 }
231 for (MapInterval& map : *flush_list) { 233 for (MapInterval* map : *flush_list) {
232 if (map->IsRegistered()) { 234 if (map->is_registered) {
233 // TODO(Blinkhawk): Replace this for reading the asynchronous flush 235 // TODO(Blinkhawk): Replace this for reading the asynchronous flush
234 FlushMap(map); 236 FlushMap(map);
235 } 237 }
@@ -265,61 +267,60 @@ protected:
265 } 267 }
266 268
267 /// Register an object into the cache 269 /// Register an object into the cache
268 void Register(const MapInterval& new_map, bool inherit_written = false) { 270 MapInterval* Register(MapInterval new_map, bool inherit_written = false) {
269 const VAddr cpu_addr = new_map->GetStart(); 271 const VAddr cpu_addr = new_map.start;
270 if (!cpu_addr) { 272 if (!cpu_addr) {
271 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", 273 LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
272 new_map->GetGpuAddress()); 274 new_map.gpu_addr);
273 return; 275 return nullptr;
274 } 276 }
275 const std::size_t size = new_map->GetEnd() - new_map->GetStart(); 277 const std::size_t size = new_map.end - new_map.start;
276 new_map->MarkAsRegistered(true); 278 new_map.is_registered = true;
277 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
278 mapped_addresses.insert({interval, new_map});
279 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 279 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
280 new_map->SetMemoryMarked(true); 280 new_map.is_memory_marked = true;
281 if (inherit_written) { 281 if (inherit_written) {
282 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); 282 MarkRegionAsWritten(new_map.start, new_map.end - 1);
283 new_map->MarkAsWritten(true); 283 new_map.is_written = true;
284 } 284 }
285 MapInterval* const storage = mapped_addresses_allocator.Allocate();
286 *storage = new_map;
287 mapped_addresses.insert(*storage);
288 return storage;
285 } 289 }
286 290
287 void UnmarkMemory(const MapInterval& map) { 291 void UnmarkMemory(MapInterval* map) {
288 if (!map->IsMemoryMarked()) { 292 if (!map->is_memory_marked) {
289 return; 293 return;
290 } 294 }
291 const std::size_t size = map->GetEnd() - map->GetStart(); 295 const std::size_t size = map->end - map->start;
292 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); 296 rasterizer.UpdatePagesCachedCount(map->start, size, -1);
293 map->SetMemoryMarked(false); 297 map->is_memory_marked = false;
294 } 298 }
295 299
296 /// Unregisters an object from the cache 300 /// Unregisters an object from the cache
297 void Unregister(const MapInterval& map) { 301 void Unregister(MapInterval* map) {
298 UnmarkMemory(map); 302 UnmarkMemory(map);
299 map->MarkAsRegistered(false); 303 map->is_registered = false;
300 if (map->IsSyncPending()) { 304 if (map->is_sync_pending) {
305 map->is_sync_pending = false;
301 marked_for_unregister.remove(map); 306 marked_for_unregister.remove(map);
302 map->SetSyncPending(false);
303 } 307 }
304 if (map->IsWritten()) { 308 if (map->is_written) {
305 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 309 UnmarkRegionAsWritten(map->start, map->end - 1);
306 } 310 }
307 const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; 311 const auto it = mapped_addresses.find(*map);
308 mapped_addresses.erase(delete_interval); 312 ASSERT(it != mapped_addresses.end());
313 mapped_addresses.erase(it);
314 mapped_addresses_allocator.Release(map);
309 } 315 }
310 316
311private: 317private:
312 MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { 318 MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr,
313 return std::make_shared<MapIntervalBase>(start, end, gpu_addr); 319 std::size_t size) {
314 } 320 const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
315
316 MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
317 const std::size_t size) {
318 std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
319 if (overlaps.empty()) { 321 if (overlaps.empty()) {
320 auto& memory_manager = system.GPU().MemoryManager(); 322 auto& memory_manager = system.GPU().MemoryManager();
321 const VAddr cpu_addr_end = cpu_addr + size; 323 const VAddr cpu_addr_end = cpu_addr + size;
322 MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr);
323 if (memory_manager.IsGranularRange(gpu_addr, size)) { 324 if (memory_manager.IsGranularRange(gpu_addr, size)) {
324 u8* host_ptr = memory_manager.GetPointer(gpu_addr); 325 u8* host_ptr = memory_manager.GetPointer(gpu_addr);
325 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); 326 UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr);
@@ -328,13 +329,12 @@ private:
328 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); 329 memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
329 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); 330 UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data());
330 } 331 }
331 Register(new_map); 332 return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
332 return new_map;
333 } 333 }
334 334
335 const VAddr cpu_addr_end = cpu_addr + size; 335 const VAddr cpu_addr_end = cpu_addr + size;
336 if (overlaps.size() == 1) { 336 if (overlaps.size() == 1) {
337 MapInterval& current_map = overlaps[0]; 337 MapInterval* const current_map = overlaps[0];
338 if (current_map->IsInside(cpu_addr, cpu_addr_end)) { 338 if (current_map->IsInside(cpu_addr, cpu_addr_end)) {
339 return current_map; 339 return current_map;
340 } 340 }
@@ -344,35 +344,39 @@ private:
344 bool write_inheritance = false; 344 bool write_inheritance = false;
345 bool modified_inheritance = false; 345 bool modified_inheritance = false;
346 // Calculate new buffer parameters 346 // Calculate new buffer parameters
347 for (auto& overlap : overlaps) { 347 for (MapInterval* overlap : overlaps) {
348 new_start = std::min(overlap->GetStart(), new_start); 348 new_start = std::min(overlap->start, new_start);
349 new_end = std::max(overlap->GetEnd(), new_end); 349 new_end = std::max(overlap->end, new_end);
350 write_inheritance |= overlap->IsWritten(); 350 write_inheritance |= overlap->is_written;
351 modified_inheritance |= overlap->IsModified(); 351 modified_inheritance |= overlap->is_modified;
352 } 352 }
353 GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; 353 GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr;
354 for (auto& overlap : overlaps) { 354 for (auto& overlap : overlaps) {
355 Unregister(overlap); 355 Unregister(overlap);
356 } 356 }
357 UpdateBlock(block, new_start, new_end, overlaps); 357 UpdateBlock(block, new_start, new_end, overlaps);
358 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); 358
359 const MapInterval new_map{new_start, new_end, new_gpu_addr};
360 MapInterval* const map = Register(new_map, write_inheritance);
361 if (!map) {
362 return nullptr;
363 }
359 if (modified_inheritance) { 364 if (modified_inheritance) {
360 new_map->MarkAsModified(true, GetModifiedTicks()); 365 map->MarkAsModified(true, GetModifiedTicks());
361 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { 366 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
362 MarkForAsyncFlush(new_map); 367 MarkForAsyncFlush(map);
363 } 368 }
364 } 369 }
365 Register(new_map, write_inheritance); 370 return map;
366 return new_map;
367 } 371 }
368 372
369 void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, 373 void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
370 std::vector<MapInterval>& overlaps) { 374 const VectorMapInterval& overlaps) {
371 const IntervalType base_interval{start, end}; 375 const IntervalType base_interval{start, end};
372 IntervalSet interval_set{}; 376 IntervalSet interval_set{};
373 interval_set.add(base_interval); 377 interval_set.add(base_interval);
374 for (auto& overlap : overlaps) { 378 for (auto& overlap : overlaps) {
375 const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; 379 const IntervalType subtract{overlap->start, overlap->end};
376 interval_set.subtract(subtract); 380 interval_set.subtract(subtract);
377 } 381 }
378 for (auto& interval : interval_set) { 382 for (auto& interval : interval_set) {
@@ -386,18 +390,24 @@ private:
386 } 390 }
387 } 391 }
388 392
389 std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { 393 VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) {
394 VectorMapInterval result;
390 if (size == 0) { 395 if (size == 0) {
391 return {}; 396 return result;
392 } 397 }
393 398
394 std::vector<MapInterval> objects{}; 399 const VAddr addr_end = addr + size;
395 const IntervalType interval{addr, addr + size}; 400 auto it = mapped_addresses.lower_bound(addr);
396 for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { 401 if (it != mapped_addresses.begin()) {
397 objects.push_back(pair.second); 402 --it;
398 } 403 }
399 404 while (it != mapped_addresses.end() && it->start < addr_end) {
400 return objects; 405 if (it->Overlaps(addr, addr_end)) {
406 result.push_back(&*it);
407 }
408 ++it;
409 }
410 return result;
401 } 411 }
402 412
403 /// Returns a ticks counter used for tracking when cached objects were last modified 413 /// Returns a ticks counter used for tracking when cached objects were last modified
@@ -405,12 +415,12 @@ private:
405 return ++modified_ticks; 415 return ++modified_ticks;
406 } 416 }
407 417
408 void FlushMap(MapInterval map) { 418 void FlushMap(MapInterval* map) {
409 std::size_t size = map->GetEnd() - map->GetStart(); 419 const std::size_t size = map->end - map->start;
410 OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; 420 OwnerBuffer block = blocks[map->start >> block_page_bits];
411 staging_buffer.resize(size); 421 staging_buffer.resize(size);
412 DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); 422 DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data());
413 system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); 423 system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
414 map->MarkAsModified(false, 0); 424 map->MarkAsModified(false, 0);
415 } 425 }
416 426
@@ -515,7 +525,7 @@ private:
515 } else { 525 } else {
516 written_pages[page_start] = 1; 526 written_pages[page_start] = 1;
517 } 527 }
518 page_start++; 528 ++page_start;
519 } 529 }
520 } 530 }
521 531
@@ -531,7 +541,7 @@ private:
531 written_pages.erase(it); 541 written_pages.erase(it);
532 } 542 }
533 } 543 }
534 page_start++; 544 ++page_start;
535 } 545 }
536 } 546 }
537 547
@@ -542,14 +552,14 @@ private:
542 if (written_pages.count(page_start) > 0) { 552 if (written_pages.count(page_start) > 0) {
543 return true; 553 return true;
544 } 554 }
545 page_start++; 555 ++page_start;
546 } 556 }
547 return false; 557 return false;
548 } 558 }
549 559
550 void MarkForAsyncFlush(MapInterval& map) { 560 void MarkForAsyncFlush(MapInterval* map) {
551 if (!uncommitted_flushes) { 561 if (!uncommitted_flushes) {
552 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); 562 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>();
553 } 563 }
554 uncommitted_flushes->insert(map); 564 uncommitted_flushes->insert(map);
555 } 565 }
@@ -566,10 +576,9 @@ private:
566 u64 buffer_offset = 0; 576 u64 buffer_offset = 0;
567 u64 buffer_offset_base = 0; 577 u64 buffer_offset_base = 0;
568 578
569 using IntervalSet = boost::icl::interval_set<VAddr>; 579 MapIntervalAllocator mapped_addresses_allocator;
570 using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; 580 boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>>
571 using IntervalType = typename IntervalCache::interval_type; 581 mapped_addresses;
572 IntervalCache mapped_addresses;
573 582
574 static constexpr u64 write_page_bit = 11; 583 static constexpr u64 write_page_bit = 11;
575 std::unordered_map<u64, u32> written_pages; 584 std::unordered_map<u64, u32> written_pages;
@@ -583,10 +592,10 @@ private:
583 u64 modified_ticks = 0; 592 u64 modified_ticks = 0;
584 593
585 std::vector<u8> staging_buffer; 594 std::vector<u8> staging_buffer;
586 std::list<MapInterval> marked_for_unregister; 595 std::list<MapInterval*> marked_for_unregister;
587 596
588 std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; 597 std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes;
589 std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; 598 std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes;
590 599
591 std::recursive_mutex mutex; 600 std::recursive_mutex mutex;
592}; 601};
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp
new file mode 100644
index 000000000..62587e18a
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.cpp
@@ -0,0 +1,33 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <cstddef>
8#include <memory>
9
10#include "video_core/buffer_cache/map_interval.h"
11
12namespace VideoCommon {
13
14MapIntervalAllocator::MapIntervalAllocator() {
15 FillFreeList(first_chunk);
16}
17
18MapIntervalAllocator::~MapIntervalAllocator() = default;
19
20void MapIntervalAllocator::AllocateNewChunk() {
21 *new_chunk = std::make_unique<Chunk>();
22 FillFreeList(**new_chunk);
23 new_chunk = &(*new_chunk)->next;
24}
25
26void MapIntervalAllocator::FillFreeList(Chunk& chunk) {
27 const std::size_t old_size = free_list.size();
28 free_list.resize(old_size + chunk.data.size());
29 std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size,
30 [](MapInterval& interval) { return &interval; });
31}
32
33} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index 29d8b26f3..fe0bcd1d8 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -4,104 +4,89 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <array>
8#include <cstddef>
9#include <memory>
10#include <vector>
11
12#include <boost/intrusive/set_hook.hpp>
13
7#include "common/common_types.h" 14#include "common/common_types.h"
8#include "video_core/gpu.h" 15#include "video_core/gpu.h"
9 16
10namespace VideoCommon { 17namespace VideoCommon {
11 18
12class MapIntervalBase { 19struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> {
13public: 20 MapInterval() = default;
14 MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr)
15 : start{start}, end{end}, gpu_addr{gpu_addr} {}
16
17 void SetCpuAddress(VAddr new_cpu_addr) {
18 cpu_addr = new_cpu_addr;
19 }
20
21 VAddr GetCpuAddress() const {
22 return cpu_addr;
23 }
24
25 GPUVAddr GetGpuAddress() const {
26 return gpu_addr;
27 }
28
29 bool IsInside(const VAddr other_start, const VAddr other_end) const {
30 return (start <= other_start && other_end <= end);
31 }
32
33 bool operator==(const MapIntervalBase& rhs) const {
34 return std::tie(start, end) == std::tie(rhs.start, rhs.end);
35 }
36
37 bool operator!=(const MapIntervalBase& rhs) const {
38 return !operator==(rhs);
39 }
40 21
41 void MarkAsRegistered(const bool registered) { 22 /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {}
42 is_registered = registered;
43 }
44 23
45 bool IsRegistered() const { 24 explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept
46 return is_registered; 25 : start{start_}, end{end_}, gpu_addr{gpu_addr_} {}
47 }
48 26
49 void SetMemoryMarked(bool is_memory_marked_) { 27 bool IsInside(VAddr other_start, VAddr other_end) const noexcept {
50 is_memory_marked = is_memory_marked_; 28 return start <= other_start && other_end <= end;
51 } 29 }
52 30
53 bool IsMemoryMarked() const { 31 bool Overlaps(VAddr other_start, VAddr other_end) const noexcept {
54 return is_memory_marked; 32 return start < other_end && other_start < end;
55 } 33 }
56 34
57 void SetSyncPending(bool is_sync_pending_) { 35 void MarkAsModified(bool is_modified_, u64 ticks_) noexcept {
58 is_sync_pending = is_sync_pending_; 36 is_modified = is_modified_;
59 } 37 ticks = ticks_;
38 }
39
40 boost::intrusive::set_member_hook<> member_hook_;
41 VAddr start = 0;
42 VAddr end = 0;
43 GPUVAddr gpu_addr = 0;
44 u64 ticks = 0;
45 bool is_written = false;
46 bool is_modified = false;
47 bool is_registered = false;
48 bool is_memory_marked = false;
49 bool is_sync_pending = false;
50};
60 51
61 bool IsSyncPending() const { 52struct MapIntervalCompare {
62 return is_sync_pending; 53 constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept {
54 return lhs.start < rhs.start;
63 } 55 }
56};
64 57
65 VAddr GetStart() const { 58class MapIntervalAllocator {
66 return start; 59public:
67 } 60 MapIntervalAllocator();
61 ~MapIntervalAllocator();
68 62
69 VAddr GetEnd() const { 63 MapInterval* Allocate() {
70 return end; 64 if (free_list.empty()) {
65 AllocateNewChunk();
66 }
67 MapInterval* const interval = free_list.back();
68 free_list.pop_back();
69 return interval;
71 } 70 }
72 71
73 void MarkAsModified(const bool is_modified_, const u64 tick) { 72 void Release(MapInterval* interval) {
74 is_modified = is_modified_; 73 free_list.push_back(interval);
75 ticks = tick;
76 } 74 }
77 75
78 bool IsModified() const { 76private:
79 return is_modified; 77 struct Chunk {
80 } 78 std::unique_ptr<Chunk> next;
79 std::array<MapInterval, 0x8000> data;
80 };
81 81
82 u64 GetModificationTick() const { 82 void AllocateNewChunk();
83 return ticks;
84 }
85 83
86 void MarkAsWritten(const bool is_written_) { 84 void FillFreeList(Chunk& chunk);
87 is_written = is_written_;
88 }
89 85
90 bool IsWritten() const { 86 std::vector<MapInterval*> free_list;
91 return is_written; 87 std::unique_ptr<Chunk>* new_chunk = &first_chunk.next;
92 }
93 88
94private: 89 Chunk first_chunk;
95 VAddr start;
96 VAddr end;
97 GPUVAddr gpu_addr;
98 VAddr cpu_addr{};
99 bool is_written{};
100 bool is_modified{};
101 bool is_registered{};
102 bool is_memory_marked{};
103 bool is_sync_pending{};
104 u64 ticks{};
105}; 90};
106 91
107} // namespace VideoCommon 92} // namespace VideoCommon
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bdc023d54..f2f96ac33 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -54,9 +54,7 @@ bool DmaPusher::Step() {
54 return true; 54 return true;
55 }); 55 });
56 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; 56 const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]};
57 GPUVAddr dma_get = command_list_header.addr; 57 const GPUVAddr dma_get = command_list_header.addr;
58 GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32);
59 bool non_main = command_list_header.is_non_main;
60 58
61 if (dma_pushbuffer_subindex >= command_list.size()) { 59 if (dma_pushbuffer_subindex >= command_list.size()) {
62 // We've gone through the current list, remove it from the queue 60 // We've gone through the current list, remove it from the queue
@@ -133,11 +131,6 @@ bool DmaPusher::Step() {
133 index++; 131 index++;
134 } 132 }
135 133
136 if (!non_main) {
137 // TODO (degasus): This is dead code, as dma_mget is never read.
138 dma_mget = dma_put;
139 }
140
141 return true; 134 return true;
142} 135}
143 136
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index e8b714e94..efa90d170 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -102,7 +102,6 @@ private:
102 DmaState dma_state{}; 102 DmaState dma_state{};
103 bool dma_increment_once{}; 103 bool dma_increment_once{};
104 104
105 GPUVAddr dma_mget{}; ///< main pushbuffer last read address
106 bool ib_enable{true}; ///< IB mode enabled 105 bool ib_enable{true}; ///< IB mode enabled
107 106
108 std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{}; 107 std::array<Tegra::Engines::EngineInterface*, max_subchannels> subchannels{};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 024c9e43b..004f6b261 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -457,8 +457,9 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
457 457
458void Maxwell3D::ProcessQueryGet() { 458void Maxwell3D::ProcessQueryGet() {
459 // TODO(Subv): Support the other query units. 459 // TODO(Subv): Support the other query units.
460 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 460 if (regs.query.query_get.unit != Regs::QueryUnit::Crop) {
461 "Units other than CROP are unimplemented"); 461 LOG_DEBUG(HW_GPU, "Units other than CROP are unimplemented");
462 }
462 463
463 switch (regs.query.query_get.operation) { 464 switch (regs.query.query_get.operation) {
464 case Regs::QueryOperation::Release: 465 case Regs::QueryOperation::Release:
@@ -534,8 +535,8 @@ void Maxwell3D::ProcessCounterReset() {
534 rasterizer.ResetCounter(QueryType::SamplesPassed); 535 rasterizer.ResetCounter(QueryType::SamplesPassed);
535 break; 536 break;
536 default: 537 default:
537 LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}", 538 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
538 static_cast<int>(regs.counter_reset)); 539 static_cast<int>(regs.counter_reset));
539 break; 540 break;
540 } 541 }
541} 542}
@@ -592,8 +593,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
592 system.GPU().GetTicks()); 593 system.GPU().GetTicks());
593 return {}; 594 return {};
594 default: 595 default:
595 UNIMPLEMENTED_MSG("Unimplemented query select type {}", 596 LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
596 static_cast<u32>(regs.query.query_get.select.Value())); 597 static_cast<u32>(regs.query.query_get.select.Value()));
597 return 1; 598 return 1;
598 } 599 }
599} 600}
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 8dae754d4..e7cb87589 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -168,18 +168,22 @@ enum class Pred : u64 {
168}; 168};
169 169
170enum class PredCondition : u64 { 170enum class PredCondition : u64 {
171 LessThan = 1, 171 F = 0, // Always false
172 Equal = 2, 172 LT = 1, // Ordered less than
173 LessEqual = 3, 173 EQ = 2, // Ordered equal
174 GreaterThan = 4, 174 LE = 3, // Ordered less than or equal
175 NotEqual = 5, 175 GT = 4, // Ordered greater than
176 GreaterEqual = 6, 176 NE = 5, // Ordered not equal
177 LessThanWithNan = 9, 177 GE = 6, // Ordered greater than or equal
178 LessEqualWithNan = 11, 178 NUM = 7, // Ordered
179 GreaterThanWithNan = 12, 179 NAN_ = 8, // Unordered
180 NotEqualWithNan = 13, 180 LTU = 9, // Unordered less than
181 GreaterEqualWithNan = 14, 181 EQU = 10, // Unordered equal
182 // TODO(Subv): Other condition types 182 LEU = 11, // Unordered less than or equal
183 GTU = 12, // Unordered greater than
184 NEU = 13, // Unordered not equal
185 GEU = 14, // Unordered greater than or equal
186 T = 15, // Always true
183}; 187};
184 188
185enum class PredOperation : u64 { 189enum class PredOperation : u64 {
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index 22987751e..096ee337c 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -56,9 +56,27 @@ public:
56 last_modified_ticks = cache.GetModifiedTicks(); 56 last_modified_ticks = cache.GetModifiedTicks();
57 } 57 }
58 58
59 void SetMemoryMarked(bool is_memory_marked_) {
60 is_memory_marked = is_memory_marked_;
61 }
62
63 bool IsMemoryMarked() const {
64 return is_memory_marked;
65 }
66
67 void SetSyncPending(bool is_sync_pending_) {
68 is_sync_pending = is_sync_pending_;
69 }
70
71 bool IsSyncPending() const {
72 return is_sync_pending;
73 }
74
59private: 75private:
60 bool is_registered{}; ///< Whether the object is currently registered with the cache 76 bool is_registered{}; ///< Whether the object is currently registered with the cache
61 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 77 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
78 bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
79 bool is_sync_pending{}; ///< Whether the object is pending deletion.
62 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 80 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
63 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space 81 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
64}; 82};
@@ -94,6 +112,30 @@ public:
94 } 112 }
95 } 113 }
96 114
115 void OnCPUWrite(VAddr addr, std::size_t size) {
116 std::lock_guard lock{mutex};
117
118 for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
119 if (object->IsRegistered()) {
120 UnmarkMemory(object);
121 object->SetSyncPending(true);
122 marked_for_unregister.emplace_back(object);
123 }
124 }
125 }
126
127 void SyncGuestHost() {
128 std::lock_guard lock{mutex};
129
130 for (const auto& object : marked_for_unregister) {
131 if (object->IsRegistered()) {
132 object->SetSyncPending(false);
133 Unregister(object);
134 }
135 }
136 marked_for_unregister.clear();
137 }
138
97 /// Invalidates everything in the cache 139 /// Invalidates everything in the cache
98 void InvalidateAll() { 140 void InvalidateAll() {
99 std::lock_guard lock{mutex}; 141 std::lock_guard lock{mutex};
@@ -120,19 +162,32 @@ protected:
120 interval_cache.add({GetInterval(object), ObjectSet{object}}); 162 interval_cache.add({GetInterval(object), ObjectSet{object}});
121 map_cache.insert({object->GetCpuAddr(), object}); 163 map_cache.insert({object->GetCpuAddr(), object});
122 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); 164 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
165 object->SetMemoryMarked(true);
123 } 166 }
124 167
125 /// Unregisters an object from the cache 168 /// Unregisters an object from the cache
126 virtual void Unregister(const T& object) { 169 virtual void Unregister(const T& object) {
127 std::lock_guard lock{mutex}; 170 std::lock_guard lock{mutex};
128 171
172 UnmarkMemory(object);
129 object->SetIsRegistered(false); 173 object->SetIsRegistered(false);
130 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); 174 if (object->IsSyncPending()) {
175 marked_for_unregister.remove(object);
176 object->SetSyncPending(false);
177 }
131 const VAddr addr = object->GetCpuAddr(); 178 const VAddr addr = object->GetCpuAddr();
132 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 179 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
133 map_cache.erase(addr); 180 map_cache.erase(addr);
134 } 181 }
135 182
183 void UnmarkMemory(const T& object) {
184 if (!object->IsMemoryMarked()) {
185 return;
186 }
187 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
188 object->SetMemoryMarked(false);
189 }
190
136 /// Returns a ticks counter used for tracking when cached objects were last modified 191 /// Returns a ticks counter used for tracking when cached objects were last modified
137 u64 GetModifiedTicks() { 192 u64 GetModifiedTicks() {
138 std::lock_guard lock{mutex}; 193 std::lock_guard lock{mutex};
@@ -194,4 +249,5 @@ private:
194 IntervalCache interval_cache; ///< Cache of objects 249 IntervalCache interval_cache; ///< Cache of objects
195 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing 250 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
196 VideoCore::RasterizerInterface& rasterizer; 251 VideoCore::RasterizerInterface& rasterizer;
252 std::list<T> marked_for_unregister;
197}; 253};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index d2cab50bd..9964ea894 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,6 +8,7 @@
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/microprofile.h" 10#include "common/microprofile.h"
11#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
12#include "video_core/rasterizer_interface.h" 13#include "video_core/rasterizer_interface.h"
13#include "video_core/renderer_opengl/gl_buffer_cache.h" 14#include "video_core/renderer_opengl/gl_buffer_cache.h"
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index d83dca25a..e1b245288 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -13,6 +13,7 @@
13 13
14#include "common/logging/log.h" 14#include "common/logging/log.h"
15#include "common/scope_exit.h" 15#include "common/scope_exit.h"
16#include "core/settings.h"
16#include "video_core/renderer_opengl/gl_device.h" 17#include "video_core/renderer_opengl/gl_device.h"
17#include "video_core/renderer_opengl/gl_resource_manager.h" 18#include "video_core/renderer_opengl/gl_resource_manager.h"
18 19
@@ -165,8 +166,6 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
165 166
166 const bool is_nvidia = vendor == "NVIDIA Corporation"; 167 const bool is_nvidia = vendor == "NVIDIA Corporation";
167 const bool is_amd = vendor == "ATI Technologies Inc."; 168 const bool is_amd = vendor == "ATI Technologies Inc.";
168 const bool is_intel = vendor == "Intel";
169 const bool is_intel_proprietary = is_intel && std::strstr(renderer, "Mesa") == nullptr;
170 169
171 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); 170 uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
172 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); 171 shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
@@ -181,12 +180,17 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
181 has_variable_aoffi = TestVariableAoffi(); 180 has_variable_aoffi = TestVariableAoffi();
182 has_component_indexing_bug = is_amd; 181 has_component_indexing_bug = is_amd;
183 has_precise_bug = TestPreciseBug(); 182 has_precise_bug = TestPreciseBug();
184 has_broken_compute = is_intel_proprietary;
185 has_fast_buffer_sub_data = is_nvidia; 183 has_fast_buffer_sub_data = is_nvidia;
184 use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
185 GLAD_GL_NV_compute_program5;
186 186
187 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi); 187 LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
188 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug); 188 LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
189 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug); 189 LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
190
191 if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
192 LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
193 }
190} 194}
191 195
192Device::Device(std::nullptr_t) { 196Device::Device(std::nullptr_t) {
@@ -199,7 +203,6 @@ Device::Device(std::nullptr_t) {
199 has_image_load_formatted = true; 203 has_image_load_formatted = true;
200 has_variable_aoffi = true; 204 has_variable_aoffi = true;
201 has_component_indexing_bug = false; 205 has_component_indexing_bug = false;
202 has_broken_compute = false;
203 has_precise_bug = false; 206 has_precise_bug = false;
204} 207}
205 208
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index a55050cb5..683ed9002 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -80,14 +80,14 @@ public:
80 return has_precise_bug; 80 return has_precise_bug;
81 } 81 }
82 82
83 bool HasBrokenCompute() const {
84 return has_broken_compute;
85 }
86
87 bool HasFastBufferSubData() const { 83 bool HasFastBufferSubData() const {
88 return has_fast_buffer_sub_data; 84 return has_fast_buffer_sub_data;
89 } 85 }
90 86
87 bool UseAssemblyShaders() const {
88 return use_assembly_shaders;
89 }
90
91private: 91private:
92 static bool TestVariableAoffi(); 92 static bool TestVariableAoffi();
93 static bool TestPreciseBug(); 93 static bool TestPreciseBug();
@@ -105,8 +105,8 @@ private:
105 bool has_variable_aoffi{}; 105 bool has_variable_aoffi{};
106 bool has_component_indexing_bug{}; 106 bool has_component_indexing_bug{};
107 bool has_precise_bug{}; 107 bool has_precise_bug{};
108 bool has_broken_compute{};
109 bool has_fast_buffer_sub_data{}; 108 bool has_fast_buffer_sub_data{};
109 bool use_assembly_shaders{};
110}; 110};
111 111
112} // namespace OpenGL 112} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 99ddcb3f8..ec5421afa 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,6 +4,7 @@
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6 6
7#include "video_core/renderer_opengl/gl_buffer_cache.h"
7#include "video_core/renderer_opengl/gl_fence_manager.h" 8#include "video_core/renderer_opengl/gl_fence_manager.h"
8 9
9namespace OpenGL { 10namespace OpenGL {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 69dcf952f..61cf99b9d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) {
94} // Anonymous namespace 94} // Anonymous namespace
95 95
96RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 96RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
97 ScreenInfo& info, GLShader::ProgramManager& program_manager, 97 const Device& device, ScreenInfo& info,
98 StateTracker& state_tracker) 98 ProgramManager& program_manager, StateTracker& state_tracker)
99 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, 99 : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
100 state_tracker},
100 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, 101 shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
101 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, 102 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
102 fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, 103 fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
103 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { 104 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
104 CheckExtensions(); 105 CheckExtensions();
106
107 if (device.UseAssemblyShaders()) {
108 glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
109 for (const GLuint cbuf : staging_cbufs) {
110 glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
111 nullptr, 0);
112 }
113 }
105} 114}
106 115
107RasterizerOpenGL::~RasterizerOpenGL() {} 116RasterizerOpenGL::~RasterizerOpenGL() {
117 if (device.UseAssemblyShaders()) {
118 glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
119 }
120}
108 121
109void RasterizerOpenGL::CheckExtensions() { 122void RasterizerOpenGL::CheckExtensions() {
110 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) { 123 if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
@@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
230void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { 243void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
231 MICROPROFILE_SCOPE(OpenGL_Shader); 244 MICROPROFILE_SCOPE(OpenGL_Shader);
232 auto& gpu = system.GPU().Maxwell3D(); 245 auto& gpu = system.GPU().Maxwell3D();
246 std::size_t num_ssbos = 0;
233 u32 clip_distances = 0; 247 u32 clip_distances = 0;
234 248
235 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 249 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
261 275
262 Shader shader{shader_cache.GetStageProgram(program)}; 276 Shader shader{shader_cache.GetStageProgram(program)};
263 277
278 if (device.UseAssemblyShaders()) {
279 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
280 // all stages share the same bindings.
281 const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
282 ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
283 num_ssbos += num_stage_ssbos;
284 }
285
264 // Stage indices are 0 - 5 286 // Stage indices are 0 - 5
265 const std::size_t stage = index == 0 ? 0 : index - 1; 287 const std::size_t stage = index == 0 ? 0 : index - 1;
266 SetupDrawConstBuffers(stage, shader); 288 SetupDrawConstBuffers(stage, shader);
@@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
526 SyncFramebufferSRGB(); 548 SyncFramebufferSRGB();
527 549
528 buffer_cache.Acquire(); 550 buffer_cache.Acquire();
551 current_cbuf = 0;
529 552
530 std::size_t buffer_size = CalculateVertexArraysSize(); 553 std::size_t buffer_size = CalculateVertexArraysSize();
531 554
@@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
535 } 558 }
536 559
537 // Uniform space for the 5 shader stages 560 // Uniform space for the 5 shader stages
538 buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + 561 buffer_size =
539 (sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) * 562 Common::AlignUp<std::size_t>(buffer_size, 4) +
540 Maxwell::MaxShaderStage; 563 (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
541 564
542 // Add space for at least 18 constant buffers 565 // Add space for at least 18 constant buffers
543 buffer_size += Maxwell::MaxConstBuffers * 566 buffer_size += Maxwell::MaxConstBuffers *
@@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
558 } 581 }
559 582
560 // Setup emulation uniform buffer. 583 // Setup emulation uniform buffer.
561 GLShader::MaxwellUniformData ubo; 584 if (!device.UseAssemblyShaders()) {
562 ubo.SetFromRegs(gpu); 585 MaxwellUniformData ubo;
563 const auto [buffer, offset] = 586 ubo.SetFromRegs(gpu);
564 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); 587 const auto [buffer, offset] =
565 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, 588 buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
566 static_cast<GLsizeiptr>(sizeof(ubo))); 589 glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
590 static_cast<GLsizeiptr>(sizeof(ubo)));
591 }
567 592
568 // Setup shaders and their used resources. 593 // Setup shaders and their used resources.
569 texture_cache.GuardSamplers(true); 594 texture_cache.GuardSamplers(true);
@@ -630,16 +655,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
630} 655}
631 656
632void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 657void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
633 if (device.HasBrokenCompute()) {
634 return;
635 }
636
637 buffer_cache.Acquire(); 658 buffer_cache.Acquire();
659 current_cbuf = 0;
638 660
639 auto kernel = shader_cache.GetComputeKernel(code_addr); 661 auto kernel = shader_cache.GetComputeKernel(code_addr);
640 SetupComputeTextures(kernel); 662 SetupComputeTextures(kernel);
641 SetupComputeImages(kernel); 663 SetupComputeImages(kernel);
642 program_manager.BindComputeShader(kernel->GetHandle());
643 664
644 const std::size_t buffer_size = 665 const std::size_t buffer_size =
645 Tegra::Engines::KeplerCompute::NumConstBuffers * 666 Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -652,6 +673,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
652 buffer_cache.Unmap(); 673 buffer_cache.Unmap();
653 674
654 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 675 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
676 program_manager.BindCompute(kernel->GetHandle());
655 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); 677 glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
656 ++num_queued_commands; 678 ++num_queued_commands;
657} 679}
@@ -701,15 +723,15 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
701 return; 723 return;
702 } 724 }
703 texture_cache.OnCPUWrite(addr, size); 725 texture_cache.OnCPUWrite(addr, size);
704 shader_cache.InvalidateRegion(addr, size); 726 shader_cache.OnCPUWrite(addr, size);
705 buffer_cache.OnCPUWrite(addr, size); 727 buffer_cache.OnCPUWrite(addr, size);
706 query_cache.InvalidateRegion(addr, size);
707} 728}
708 729
709void RasterizerOpenGL::SyncGuestHost() { 730void RasterizerOpenGL::SyncGuestHost() {
710 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 731 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
711 texture_cache.SyncGuestHost(); 732 texture_cache.SyncGuestHost();
712 buffer_cache.SyncGuestHost(); 733 buffer_cache.SyncGuestHost();
734 shader_cache.SyncGuestHost();
713} 735}
714 736
715void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { 737void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
@@ -812,14 +834,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
812} 834}
813 835
814void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { 836void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
837 static constexpr std::array PARAMETER_LUT = {
838 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
839 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
840 GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
841
815 MICROPROFILE_SCOPE(OpenGL_UBO); 842 MICROPROFILE_SCOPE(OpenGL_UBO);
816 const auto& stages = system.GPU().Maxwell3D().state.shader_stages; 843 const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
817 const auto& shader_stage = stages[stage_index]; 844 const auto& shader_stage = stages[stage_index];
818 845
819 u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; 846 u32 binding =
847 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
820 for (const auto& entry : shader->GetEntries().const_buffers) { 848 for (const auto& entry : shader->GetEntries().const_buffers) {
821 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; 849 const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
822 SetupConstBuffer(binding++, buffer, entry); 850 SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
823 } 851 }
824} 852}
825 853
@@ -835,16 +863,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
835 buffer.address = config.Address(); 863 buffer.address = config.Address();
836 buffer.size = config.size; 864 buffer.size = config.size;
837 buffer.enabled = mask[entry.GetIndex()]; 865 buffer.enabled = mask[entry.GetIndex()];
838 SetupConstBuffer(binding++, buffer, entry); 866 SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
839 } 867 }
840} 868}
841 869
842void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 870void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
871 const Tegra::Engines::ConstBufferInfo& buffer,
843 const ConstBufferEntry& entry) { 872 const ConstBufferEntry& entry) {
844 if (!buffer.enabled) { 873 if (!buffer.enabled) {
845 // Set values to zero to unbind buffers 874 // Set values to zero to unbind buffers
846 glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, 875 if (device.UseAssemblyShaders()) {
847 sizeof(float)); 876 glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
877 } else {
878 glBindBufferRange(GL_UNIFORM_BUFFER, binding,
879 buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
880 }
848 return; 881 return;
849 } 882 }
850 883
@@ -853,9 +886,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
853 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); 886 const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
854 887
855 const auto alignment = device.GetUniformBufferAlignment(); 888 const auto alignment = device.GetUniformBufferAlignment();
856 const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, 889 auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
857 device.HasFastBufferSubData()); 890 device.HasFastBufferSubData());
858 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); 891 if (!device.UseAssemblyShaders()) {
892 glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
893 return;
894 }
895 if (offset != 0) {
896 const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
897 glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
898 cbuf = staging_cbuf;
899 offset = 0;
900 }
901 glBindBufferRangeNV(stage, binding, cbuf, offset, size);
859} 902}
860 903
861void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { 904void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@@ -863,7 +906,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
863 auto& memory_manager{gpu.MemoryManager()}; 906 auto& memory_manager{gpu.MemoryManager()};
864 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 907 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
865 908
866 u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; 909 u32 binding =
910 device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
867 for (const auto& entry : shader->GetEntries().global_memory_entries) { 911 for (const auto& entry : shader->GetEntries().global_memory_entries) {
868 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; 912 const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
869 const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; 913 const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
@@ -929,16 +973,12 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
929 glBindTextureUnit(binding, 0); 973 glBindTextureUnit(binding, 0);
930 return; 974 return;
931 } 975 }
932 glBindTextureUnit(binding, view->GetTexture()); 976 const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
933 977 texture.tic.z_source, texture.tic.w_source);
934 if (view->GetSurfaceParams().IsBuffer()) { 978 glBindTextureUnit(binding, handle);
935 return; 979 if (!view->GetSurfaceParams().IsBuffer()) {
980 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
936 } 981 }
937 // Apply swizzle to textures that are not buffers.
938 view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
939 texture.tic.w_source);
940
941 glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
942} 982}
943 983
944void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 984void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
@@ -967,14 +1007,11 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
967 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); 1007 glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
968 return; 1008 return;
969 } 1009 }
970 if (!tic.IsBuffer()) {
971 view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
972 }
973 if (entry.is_written) { 1010 if (entry.is_written) {
974 view->MarkAsModified(texture_cache.Tick()); 1011 view->MarkAsModified(texture_cache.Tick());
975 } 1012 }
976 glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, 1013 const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
977 view->GetFormat()); 1014 glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
978} 1015}
979 1016
980void RasterizerOpenGL::SyncViewport() { 1017void RasterizerOpenGL::SyncViewport() {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index b94c65907..87f7fe159 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -56,8 +56,8 @@ struct DrawParameters;
56class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { 56class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
57public: 57public:
58 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, 58 explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
59 ScreenInfo& info, GLShader::ProgramManager& program_manager, 59 const Device& device, ScreenInfo& info,
60 StateTracker& state_tracker); 60 ProgramManager& program_manager, StateTracker& state_tracker);
61 ~RasterizerOpenGL() override; 61 ~RasterizerOpenGL() override;
62 62
63 void Draw(bool is_indexed, bool is_instanced) override; 63 void Draw(bool is_indexed, bool is_instanced) override;
@@ -106,7 +106,7 @@ private:
106 void SetupComputeConstBuffers(const Shader& kernel); 106 void SetupComputeConstBuffers(const Shader& kernel);
107 107
108 /// Configures a constant buffer. 108 /// Configures a constant buffer.
109 void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
110 const ConstBufferEntry& entry); 110 const ConstBufferEntry& entry);
111 111
112 /// Configures the current global memory entries to use for the draw command. 112 /// Configures the current global memory entries to use for the draw command.
@@ -224,7 +224,7 @@ private:
224 224
225 void SetupShaders(GLenum primitive_mode); 225 void SetupShaders(GLenum primitive_mode);
226 226
227 const Device device; 227 const Device& device;
228 228
229 TextureCacheOpenGL texture_cache; 229 TextureCacheOpenGL texture_cache;
230 ShaderCacheOpenGL shader_cache; 230 ShaderCacheOpenGL shader_cache;
@@ -236,7 +236,7 @@ private:
236 236
237 Core::System& system; 237 Core::System& system;
238 ScreenInfo& screen_info; 238 ScreenInfo& screen_info;
239 GLShader::ProgramManager& program_manager; 239 ProgramManager& program_manager;
240 StateTracker& state_tracker; 240 StateTracker& state_tracker;
241 241
242 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 242 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
@@ -248,6 +248,12 @@ private:
248 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> 248 std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
249 enabled_transform_feedback_buffers; 249 enabled_transform_feedback_buffers;
250 250
251 static constexpr std::size_t NUM_CONSTANT_BUFFERS =
252 Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
253 Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
254 std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
255 std::size_t current_cbuf = 0;
256
251 /// Number of commands queued to the OpenGL driver. Reseted on flush. 257 /// Number of commands queued to the OpenGL driver. Reseted on flush.
252 std::size_t num_queued_commands = 0; 258 std::size_t num_queued_commands = 0;
253 259
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index 97803d480..a787e27d2 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -125,6 +125,15 @@ void OGLProgram::Release() {
125 handle = 0; 125 handle = 0;
126} 126}
127 127
128void OGLAssemblyProgram::Release() {
129 if (handle == 0) {
130 return;
131 }
132 MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
133 glDeleteProgramsARB(1, &handle);
134 handle = 0;
135}
136
128void OGLPipeline::Create() { 137void OGLPipeline::Create() {
129 if (handle != 0) 138 if (handle != 0)
130 return; 139 return;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index de93f4212..f8b322227 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -167,6 +167,22 @@ public:
167 GLuint handle = 0; 167 GLuint handle = 0;
168}; 168};
169 169
170class OGLAssemblyProgram : private NonCopyable {
171public:
172 OGLAssemblyProgram() = default;
173
174 OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
175
176 ~OGLAssemblyProgram() {
177 Release();
178 }
179
180 /// Deletes the internal OpenGL resource
181 void Release();
182
183 GLuint handle = 0;
184};
185
170class OGLPipeline : private NonCopyable { 186class OGLPipeline : private NonCopyable {
171public: 187public:
172 OGLPipeline() = default; 188 OGLPipeline() = default;
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 9759a7078..4cd0f36cf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
97 return {}; 97 return {};
98} 98}
99 99
100constexpr GLenum AssemblyEnum(ShaderType shader_type) {
101 switch (shader_type) {
102 case ShaderType::Vertex:
103 return GL_VERTEX_PROGRAM_NV;
104 case ShaderType::TesselationControl:
105 return GL_TESS_CONTROL_PROGRAM_NV;
106 case ShaderType::TesselationEval:
107 return GL_TESS_EVALUATION_PROGRAM_NV;
108 case ShaderType::Geometry:
109 return GL_GEOMETRY_PROGRAM_NV;
110 case ShaderType::Fragment:
111 return GL_FRAGMENT_PROGRAM_NV;
112 case ShaderType::Compute:
113 return GL_COMPUTE_PROGRAM_NV;
114 }
115 return {};
116}
117
100std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { 118std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
101 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); 119 return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
102} 120}
@@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
120 return registry; 138 return registry;
121} 139}
122 140
123std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, 141ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
124 u64 unique_identifier, const ShaderIR& ir, 142 const ShaderIR& ir, const Registry& registry,
125 const Registry& registry, bool hint_retrievable = false) { 143 bool hint_retrievable = false) {
126 const std::string shader_id = MakeShaderID(unique_identifier, shader_type); 144 const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
127 LOG_INFO(Render_OpenGL, "{}", shader_id); 145 LOG_INFO(Render_OpenGL, "{}", shader_id);
128 146
129 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); 147 auto program = std::make_shared<ProgramHandle>();
130 OGLShader shader; 148
131 shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); 149 if (device.UseAssemblyShaders()) {
150 const std::string arb = "Not implemented";
151
152 GLuint& arb_prog = program->assembly_program.handle;
153
154// Commented out functions signal OpenGL errors but are compatible with apitrace.
155// Use them only to capture and replay on apitrace.
156#if 0
157 glGenProgramsNV(1, &arb_prog);
158 glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
159 reinterpret_cast<const GLubyte*>(arb.data()));
160#else
161 glGenProgramsARB(1, &arb_prog);
162 glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
163 static_cast<GLsizei>(arb.size()), arb.data());
164#endif
165 const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
166 if (err && *err) {
167 LOG_CRITICAL(Render_OpenGL, "{}", err);
168 LOG_INFO(Render_OpenGL, "\n{}", arb);
169 }
170 } else {
171 const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
172 OGLShader shader;
173 shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
174
175 program->source_program.Create(true, hint_retrievable, shader.handle);
176 }
132 177
133 auto program = std::make_shared<OGLProgram>();
134 program->Create(true, hint_retrievable, shader.handle);
135 return program; 178 return program;
136} 179}
137 180
@@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() {
153 196
154CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 197CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
155 std::shared_ptr<VideoCommon::Shader::Registry> registry, 198 std::shared_ptr<VideoCommon::Shader::Registry> registry,
156 ShaderEntries entries, std::shared_ptr<OGLProgram> program) 199 ShaderEntries entries, ProgramSharedPtr program_)
157 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, 200 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
158 size_in_bytes{size_in_bytes}, program{std::move(program)} {} 201 size_in_bytes{size_in_bytes}, program{std::move(program_)} {
202 // Assign either the assembly program or source program. We can't have both.
203 handle = program->assembly_program.handle;
204 if (handle == 0) {
205 handle = program->source_program.handle;
206 }
207 ASSERT(handle != 0);
208}
159 209
160CachedShader::~CachedShader() = default; 210CachedShader::~CachedShader() = default;
161 211
162GLuint CachedShader::GetHandle() const { 212GLuint CachedShader::GetHandle() const {
163 DEBUG_ASSERT(registry->IsConsistent()); 213 DEBUG_ASSERT(registry->IsConsistent());
164 return program->handle; 214 return handle;
165} 215}
166 216
167Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 217Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
@@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
239 return; 289 return;
240 } 290 }
241 291
242 const std::vector gl_cache = disk_cache.LoadPrecompiled(); 292 std::vector<ShaderDiskCachePrecompiled> gl_cache;
293 if (!device.UseAssemblyShaders()) {
294 // Only load precompiled cache when we are not using assembly shaders
295 gl_cache = disk_cache.LoadPrecompiled();
296 }
243 const auto supported_formats = GetSupportedFormats(); 297 const auto supported_formats = GetSupportedFormats();
244 298
245 // Track if precompiled cache was altered during loading to know if we have to 299 // Track if precompiled cache was altered during loading to know if we have to
@@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
278 auto registry = MakeRegistry(entry); 332 auto registry = MakeRegistry(entry);
279 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); 333 const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
280 334
281 std::shared_ptr<OGLProgram> program; 335 ProgramSharedPtr program;
282 if (precompiled_entry) { 336 if (precompiled_entry) {
283 // If the shader is precompiled, attempt to load it with 337 // If the shader is precompiled, attempt to load it with
284 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); 338 program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
@@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
332 return; 386 return;
333 } 387 }
334 388
389 if (device.UseAssemblyShaders()) {
390 // Don't store precompiled binaries for assembly shaders.
391 return;
392 }
393
335 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw 394 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
336 // before precompiling them 395 // before precompiling them
337 396
@@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
339 const u64 id = (*transferable)[i].unique_identifier; 398 const u64 id = (*transferable)[i].unique_identifier;
340 const auto it = find_precompiled(id); 399 const auto it = find_precompiled(id);
341 if (it == gl_cache.end()) { 400 if (it == gl_cache.end()) {
342 const GLuint program = runtime_cache.at(id).program->handle; 401 const GLuint program = runtime_cache.at(id).program->source_program.handle;
343 disk_cache.SavePrecompiled(id, program); 402 disk_cache.SavePrecompiled(id, program);
344 precompiled_cache_altered = true; 403 precompiled_cache_altered = true;
345 } 404 }
@@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
350 } 409 }
351} 410}
352 411
353std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( 412ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
354 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, 413 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
355 const std::unordered_set<GLenum>& supported_formats) { 414 const std::unordered_set<GLenum>& supported_formats) {
356 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { 415 if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
@@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
358 return {}; 417 return {};
359 } 418 }
360 419
361 auto program = std::make_shared<OGLProgram>(); 420 auto program = std::make_shared<ProgramHandle>();
362 program->handle = glCreateProgram(); 421 GLuint& handle = program->source_program.handle;
363 glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); 422 handle = glCreateProgram();
364 glProgramBinary(program->handle, precompiled_entry.binary_format, 423 glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
365 precompiled_entry.binary.data(), 424 glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
366 static_cast<GLsizei>(precompiled_entry.binary.size())); 425 static_cast<GLsizei>(precompiled_entry.binary.size()));
367 426
368 GLint link_status; 427 GLint link_status;
369 glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); 428 glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
370 if (link_status == GL_FALSE) { 429 if (link_status == GL_FALSE) {
371 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); 430 LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
372 return {}; 431 return {};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 91690b470..b2ae8d7f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -43,8 +43,14 @@ struct UnspecializedShader;
43using Shader = std::shared_ptr<CachedShader>; 43using Shader = std::shared_ptr<CachedShader>;
44using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45 45
46struct ProgramHandle {
47 OGLProgram source_program;
48 OGLAssemblyProgram assembly_program;
49};
50using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
51
46struct PrecompiledShader { 52struct PrecompiledShader {
47 std::shared_ptr<OGLProgram> program; 53 ProgramSharedPtr program;
48 std::shared_ptr<VideoCommon::Shader::Registry> registry; 54 std::shared_ptr<VideoCommon::Shader::Registry> registry;
49 ShaderEntries entries; 55 ShaderEntries entries;
50}; 56};
@@ -87,12 +93,13 @@ public:
87private: 93private:
88 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 94 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
89 std::shared_ptr<VideoCommon::Shader::Registry> registry, 95 std::shared_ptr<VideoCommon::Shader::Registry> registry,
90 ShaderEntries entries, std::shared_ptr<OGLProgram> program); 96 ShaderEntries entries, ProgramSharedPtr program);
91 97
92 std::shared_ptr<VideoCommon::Shader::Registry> registry; 98 std::shared_ptr<VideoCommon::Shader::Registry> registry;
93 ShaderEntries entries; 99 ShaderEntries entries;
94 std::size_t size_in_bytes = 0; 100 std::size_t size_in_bytes = 0;
95 std::shared_ptr<OGLProgram> program; 101 ProgramSharedPtr program;
102 GLuint handle = 0;
96}; 103};
97 104
98class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 105class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -115,7 +122,7 @@ protected:
115 void FlushObjectInner(const Shader& object) override {} 122 void FlushObjectInner(const Shader& object) override {}
116 123
117private: 124private:
118 std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( 125 ProgramSharedPtr GeneratePrecompiledProgram(
119 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, 126 const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
120 const std::unordered_set<GLenum>& supported_formats); 127 const std::unordered_set<GLenum>& supported_formats);
121 128
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 99fd4ae2c..9cb115959 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1538,7 +1538,9 @@ private:
1538 Expression target; 1538 Expression target;
1539 if (const auto gpr = std::get_if<GprNode>(&*dest)) { 1539 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1540 if (gpr->GetIndex() == Register::ZeroIndex) { 1540 if (gpr->GetIndex() == Register::ZeroIndex) {
1541 // Writing to Register::ZeroIndex is a no op 1541 // Writing to Register::ZeroIndex is a no op but we still have to visit the source
1542 // as it might have side effects.
1543 code.AddLine("{};", Visit(src).GetCode());
1542 return {}; 1544 return {};
1543 } 1545 }
1544 target = {GetRegister(gpr->GetIndex()), Type::Float}; 1546 target = {GetRegister(gpr->GetIndex()), Type::Float};
@@ -1840,34 +1842,40 @@ private:
1840 Type::HalfFloat}; 1842 Type::HalfFloat};
1841 } 1843 }
1842 1844
1843 template <Type type> 1845 template <const std::string_view& op, Type type, bool unordered = false>
1844 Expression LogicalLessThan(Operation operation) { 1846 Expression Comparison(Operation operation) {
1845 return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); 1847 static_assert(!unordered || type == Type::Float);
1846 }
1847 1848
1848 template <Type type> 1849 const Expression expr = GenerateBinaryInfix(operation, op, Type::Bool, type, type);
1849 Expression LogicalEqual(Operation operation) {
1850 return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
1851 }
1852 1850
1853 template <Type type> 1851 if constexpr (op.compare("!=") == 0 && type == Type::Float && !unordered) {
1854 Expression LogicalLessEqual(Operation operation) { 1852 // GLSL's operator!=(float, float) doesn't seem be ordered. This happens on both AMD's
1855 return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type); 1853 // and Nvidia's proprietary stacks. Manually force an ordered comparison.
1856 } 1854 return {fmt::format("({} && !isnan({}) && !isnan({}))", expr.AsBool(),
1857 1855 VisitOperand(operation, 0).AsFloat(),
1858 template <Type type> 1856 VisitOperand(operation, 1).AsFloat()),
1859 Expression LogicalGreaterThan(Operation operation) { 1857 Type::Bool};
1860 return GenerateBinaryInfix(operation, ">", Type::Bool, type, type); 1858 }
1859 if constexpr (!unordered) {
1860 return expr;
1861 }
1862 // Unordered comparisons are always true for NaN operands.
1863 return {fmt::format("({} || isnan({}) || isnan({}))", expr.AsBool(),
1864 VisitOperand(operation, 0).AsFloat(),
1865 VisitOperand(operation, 1).AsFloat()),
1866 Type::Bool};
1861 } 1867 }
1862 1868
1863 template <Type type> 1869 Expression FOrdered(Operation operation) {
1864 Expression LogicalNotEqual(Operation operation) { 1870 return {fmt::format("(!isnan({}) && !isnan({}))", VisitOperand(operation, 0).AsFloat(),
1865 return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type); 1871 VisitOperand(operation, 1).AsFloat()),
1872 Type::Bool};
1866 } 1873 }
1867 1874
1868 template <Type type> 1875 Expression FUnordered(Operation operation) {
1869 Expression LogicalGreaterEqual(Operation operation) { 1876 return {fmt::format("(isnan({}) || isnan({}))", VisitOperand(operation, 0).AsFloat(),
1870 return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type); 1877 VisitOperand(operation, 1).AsFloat()),
1878 Type::Bool};
1871 } 1879 }
1872 1880
1873 Expression LogicalAddCarry(Operation operation) { 1881 Expression LogicalAddCarry(Operation operation) {
@@ -2303,6 +2311,18 @@ private:
2303 return {"gl_SubGroupInvocationARB", Type::Uint}; 2311 return {"gl_SubGroupInvocationARB", Type::Uint};
2304 } 2312 }
2305 2313
2314 template <const std::string_view& comparison>
2315 Expression ThreadMask(Operation) {
2316 if (device.HasWarpIntrinsics()) {
2317 return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint};
2318 }
2319 if (device.HasShaderBallot()) {
2320 return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint};
2321 }
2322 LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader");
2323 return {"0U", Type::Uint};
2324 }
2325
2306 Expression ShuffleIndexed(Operation operation) { 2326 Expression ShuffleIndexed(Operation operation) {
2307 std::string value = VisitOperand(operation, 0).AsFloat(); 2327 std::string value = VisitOperand(operation, 0).AsFloat();
2308 2328
@@ -2315,7 +2335,21 @@ private:
2315 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float}; 2335 return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
2316 } 2336 }
2317 2337
2318 Expression MemoryBarrierGL(Operation) { 2338 Expression Barrier(Operation) {
2339 if (!ir.IsDecompiled()) {
2340 LOG_ERROR(Render_OpenGL, "barrier() used but shader is not decompiled");
2341 return {};
2342 }
2343 code.AddLine("barrier();");
2344 return {};
2345 }
2346
2347 Expression MemoryBarrierGroup(Operation) {
2348 code.AddLine("groupMemoryBarrier();");
2349 return {};
2350 }
2351
2352 Expression MemoryBarrierGlobal(Operation) {
2319 code.AddLine("memoryBarrier();"); 2353 code.AddLine("memoryBarrier();");
2320 return {}; 2354 return {};
2321 } 2355 }
@@ -2324,6 +2358,19 @@ private:
2324 Func() = delete; 2358 Func() = delete;
2325 ~Func() = delete; 2359 ~Func() = delete;
2326 2360
2361 static constexpr std::string_view LessThan = "<";
2362 static constexpr std::string_view Equal = "==";
2363 static constexpr std::string_view LessEqual = "<=";
2364 static constexpr std::string_view GreaterThan = ">";
2365 static constexpr std::string_view NotEqual = "!=";
2366 static constexpr std::string_view GreaterEqual = ">=";
2367
2368 static constexpr std::string_view Eq = "Eq";
2369 static constexpr std::string_view Ge = "Ge";
2370 static constexpr std::string_view Gt = "Gt";
2371 static constexpr std::string_view Le = "Le";
2372 static constexpr std::string_view Lt = "Lt";
2373
2327 static constexpr std::string_view Add = "Add"; 2374 static constexpr std::string_view Add = "Add";
2328 static constexpr std::string_view Min = "Min"; 2375 static constexpr std::string_view Min = "Min";
2329 static constexpr std::string_view Max = "Max"; 2376 static constexpr std::string_view Max = "Max";
@@ -2425,27 +2472,34 @@ private:
2425 &GLSLDecompiler::LogicalPick2, 2472 &GLSLDecompiler::LogicalPick2,
2426 &GLSLDecompiler::LogicalAnd2, 2473 &GLSLDecompiler::LogicalAnd2,
2427 2474
2428 &GLSLDecompiler::LogicalLessThan<Type::Float>, 2475 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, false>,
2429 &GLSLDecompiler::LogicalEqual<Type::Float>, 2476 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, false>,
2430 &GLSLDecompiler::LogicalLessEqual<Type::Float>, 2477 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, false>,
2431 &GLSLDecompiler::LogicalGreaterThan<Type::Float>, 2478 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, false>,
2432 &GLSLDecompiler::LogicalNotEqual<Type::Float>, 2479 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, false>,
2433 &GLSLDecompiler::LogicalGreaterEqual<Type::Float>, 2480 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, false>,
2434 &GLSLDecompiler::LogicalFIsNan, 2481 &GLSLDecompiler::FOrdered,
2435 2482 &GLSLDecompiler::FUnordered,
2436 &GLSLDecompiler::LogicalLessThan<Type::Int>, 2483 &GLSLDecompiler::Comparison<Func::LessThan, Type::Float, true>,
2437 &GLSLDecompiler::LogicalEqual<Type::Int>, 2484 &GLSLDecompiler::Comparison<Func::Equal, Type::Float, true>,
2438 &GLSLDecompiler::LogicalLessEqual<Type::Int>, 2485 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Float, true>,
2439 &GLSLDecompiler::LogicalGreaterThan<Type::Int>, 2486 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Float, true>,
2440 &GLSLDecompiler::LogicalNotEqual<Type::Int>, 2487 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Float, true>,
2441 &GLSLDecompiler::LogicalGreaterEqual<Type::Int>, 2488 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Float, true>,
2442 2489
2443 &GLSLDecompiler::LogicalLessThan<Type::Uint>, 2490 &GLSLDecompiler::Comparison<Func::LessThan, Type::Int>,
2444 &GLSLDecompiler::LogicalEqual<Type::Uint>, 2491 &GLSLDecompiler::Comparison<Func::Equal, Type::Int>,
2445 &GLSLDecompiler::LogicalLessEqual<Type::Uint>, 2492 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Int>,
2446 &GLSLDecompiler::LogicalGreaterThan<Type::Uint>, 2493 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Int>,
2447 &GLSLDecompiler::LogicalNotEqual<Type::Uint>, 2494 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Int>,
2448 &GLSLDecompiler::LogicalGreaterEqual<Type::Uint>, 2495 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Int>,
2496
2497 &GLSLDecompiler::Comparison<Func::LessThan, Type::Uint>,
2498 &GLSLDecompiler::Comparison<Func::Equal, Type::Uint>,
2499 &GLSLDecompiler::Comparison<Func::LessEqual, Type::Uint>,
2500 &GLSLDecompiler::Comparison<Func::GreaterThan, Type::Uint>,
2501 &GLSLDecompiler::Comparison<Func::NotEqual, Type::Uint>,
2502 &GLSLDecompiler::Comparison<Func::GreaterEqual, Type::Uint>,
2449 2503
2450 &GLSLDecompiler::LogicalAddCarry, 2504 &GLSLDecompiler::LogicalAddCarry,
2451 2505
@@ -2534,9 +2588,16 @@ private:
2534 &GLSLDecompiler::VoteEqual, 2588 &GLSLDecompiler::VoteEqual,
2535 2589
2536 &GLSLDecompiler::ThreadId, 2590 &GLSLDecompiler::ThreadId,
2591 &GLSLDecompiler::ThreadMask<Func::Eq>,
2592 &GLSLDecompiler::ThreadMask<Func::Ge>,
2593 &GLSLDecompiler::ThreadMask<Func::Gt>,
2594 &GLSLDecompiler::ThreadMask<Func::Le>,
2595 &GLSLDecompiler::ThreadMask<Func::Lt>,
2537 &GLSLDecompiler::ShuffleIndexed, 2596 &GLSLDecompiler::ShuffleIndexed,
2538 2597
2539 &GLSLDecompiler::MemoryBarrierGL, 2598 &GLSLDecompiler::Barrier,
2599 &GLSLDecompiler::MemoryBarrierGroup,
2600 &GLSLDecompiler::MemoryBarrierGlobal,
2540 }; 2601 };
2541 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2602 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2542 2603
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 9c7b0adbd..8e754fa90 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -6,45 +6,109 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/engines/maxwell_3d.h" 8#include "video_core/engines/maxwell_3d.h"
9#include "video_core/renderer_opengl/gl_device.h"
9#include "video_core/renderer_opengl/gl_shader_manager.h" 10#include "video_core/renderer_opengl/gl_shader_manager.h"
10 11
11namespace OpenGL::GLShader { 12namespace OpenGL {
12 13
13ProgramManager::ProgramManager() = default; 14ProgramManager::ProgramManager(const Device& device) {
15 use_assembly_programs = device.UseAssemblyShaders();
16 if (use_assembly_programs) {
17 glEnable(GL_COMPUTE_PROGRAM_NV);
18 } else {
19 graphics_pipeline.Create();
20 glBindProgramPipeline(graphics_pipeline.handle);
21 }
22}
14 23
15ProgramManager::~ProgramManager() = default; 24ProgramManager::~ProgramManager() = default;
16 25
17void ProgramManager::Create() { 26void ProgramManager::BindCompute(GLuint program) {
18 graphics_pipeline.Create(); 27 if (use_assembly_programs) {
19 glBindProgramPipeline(graphics_pipeline.handle); 28 glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
29 } else {
30 is_graphics_bound = false;
31 glUseProgram(program);
32 }
20} 33}
21 34
22void ProgramManager::BindGraphicsPipeline() { 35void ProgramManager::BindGraphicsPipeline() {
23 if (!is_graphics_bound) { 36 if (use_assembly_programs) {
24 is_graphics_bound = true; 37 UpdateAssemblyPrograms();
25 glUseProgram(0); 38 } else {
39 UpdateSourcePrograms();
26 } 40 }
41}
27 42
28 // Avoid updating the pipeline when values have no changed 43void ProgramManager::BindHostPipeline(GLuint pipeline) {
29 if (old_state == current_state) { 44 if (use_assembly_programs) {
30 return; 45 if (geometry_enabled) {
46 geometry_enabled = false;
47 old_state.geometry = 0;
48 glDisable(GL_GEOMETRY_PROGRAM_NV);
49 }
50 } else {
51 if (!is_graphics_bound) {
52 glUseProgram(0);
53 }
31 } 54 }
55 glBindProgramPipeline(pipeline);
56}
32 57
33 // Workaround for AMD bug 58void ProgramManager::RestoreGuestPipeline() {
34 static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | 59 if (use_assembly_programs) {
35 GL_FRAGMENT_SHADER_BIT}; 60 glBindProgramPipeline(0);
36 const GLuint handle = graphics_pipeline.handle; 61 } else {
37 glUseProgramStages(handle, all_used_stages, 0); 62 glBindProgramPipeline(graphics_pipeline.handle);
38 glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); 63 }
39 glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); 64}
40 glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); 65
66void ProgramManager::UpdateAssemblyPrograms() {
67 const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
68 if (current == old) {
69 return;
70 }
71 if (current == 0) {
72 if (enabled) {
73 enabled = false;
74 glDisable(stage);
75 }
76 return;
77 }
78 if (!enabled) {
79 enabled = true;
80 glEnable(stage);
81 }
82 glBindProgramARB(stage, current);
83 };
84
85 update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
86 update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
87 old_state.geometry);
88 update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
89 old_state.fragment);
41 90
42 old_state = current_state; 91 old_state = current_state;
43} 92}
44 93
45void ProgramManager::BindComputeShader(GLuint program) { 94void ProgramManager::UpdateSourcePrograms() {
46 is_graphics_bound = false; 95 if (!is_graphics_bound) {
47 glUseProgram(program); 96 is_graphics_bound = true;
97 glUseProgram(0);
98 }
99
100 const GLuint handle = graphics_pipeline.handle;
101 const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
102 if (current == old) {
103 return;
104 }
105 glUseProgramStages(handle, stage, current);
106 };
107 update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
108 update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
109 update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
110
111 old_state = current_state;
48} 112}
49 113
50void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { 114void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
@@ -54,4 +118,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
54 y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; 118 y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
55} 119}
56 120
57} // namespace OpenGL::GLShader 121} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index d2e47f2a9..0f03b4f12 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -11,7 +11,9 @@
11#include "video_core/renderer_opengl/gl_resource_manager.h" 11#include "video_core/renderer_opengl/gl_resource_manager.h"
12#include "video_core/renderer_opengl/maxwell_to_gl.h" 12#include "video_core/renderer_opengl/maxwell_to_gl.h"
13 13
14namespace OpenGL::GLShader { 14namespace OpenGL {
15
16class Device;
15 17
16/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned 18/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
17/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at 19/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
@@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
28 30
29class ProgramManager { 31class ProgramManager {
30public: 32public:
31 explicit ProgramManager(); 33 explicit ProgramManager(const Device& device);
32 ~ProgramManager(); 34 ~ProgramManager();
33 35
34 void Create(); 36 /// Binds a compute program
37 void BindCompute(GLuint program);
35 38
36 /// Updates the graphics pipeline and binds it. 39 /// Updates bound programs.
37 void BindGraphicsPipeline(); 40 void BindGraphicsPipeline();
38 41
39 /// Binds a compute shader. 42 /// Binds an OpenGL pipeline object unsynchronized with the guest state.
40 void BindComputeShader(GLuint program); 43 void BindHostPipeline(GLuint pipeline);
44
45 /// Rewinds BindHostPipeline state changes.
46 void RestoreGuestPipeline();
41 47
42 void UseVertexShader(GLuint program) { 48 void UseVertexShader(GLuint program) {
43 current_state.vertex_shader = program; 49 current_state.vertex = program;
44 } 50 }
45 51
46 void UseGeometryShader(GLuint program) { 52 void UseGeometryShader(GLuint program) {
47 current_state.geometry_shader = program; 53 current_state.geometry = program;
48 } 54 }
49 55
50 void UseFragmentShader(GLuint program) { 56 void UseFragmentShader(GLuint program) {
51 current_state.fragment_shader = program; 57 current_state.fragment = program;
52 } 58 }
53 59
54private: 60private:
55 struct PipelineState { 61 struct PipelineState {
56 bool operator==(const PipelineState& rhs) const noexcept { 62 GLuint vertex = 0;
57 return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && 63 GLuint geometry = 0;
58 geometry_shader == rhs.geometry_shader; 64 GLuint fragment = 0;
59 }
60
61 bool operator!=(const PipelineState& rhs) const noexcept {
62 return !operator==(rhs);
63 }
64
65 GLuint vertex_shader = 0;
66 GLuint fragment_shader = 0;
67 GLuint geometry_shader = 0;
68 }; 65 };
69 66
67 /// Update NV_gpu_program5 programs.
68 void UpdateAssemblyPrograms();
69
70 /// Update GLSL programs.
71 void UpdateSourcePrograms();
72
70 OGLPipeline graphics_pipeline; 73 OGLPipeline graphics_pipeline;
71 OGLPipeline compute_pipeline; 74
72 PipelineState current_state; 75 PipelineState current_state;
73 PipelineState old_state; 76 PipelineState old_state;
77
78 bool use_assembly_programs = false;
79
74 bool is_graphics_bound = true; 80 bool is_graphics_bound = true;
81
82 bool vertex_enabled = false;
83 bool geometry_enabled = false;
84 bool fragment_enabled = false;
75}; 85};
76 86
77} // namespace OpenGL::GLShader 87} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 94fbd2a22..4faa8b90c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -35,7 +35,7 @@ MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy",
35namespace { 35namespace {
36 36
37struct FormatTuple { 37struct FormatTuple {
38 GLint internal_format; 38 GLenum internal_format;
39 GLenum format = GL_NONE; 39 GLenum format = GL_NONE;
40 GLenum type = GL_NONE; 40 GLenum type = GL_NONE;
41}; 41};
@@ -238,6 +238,12 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
238 return texture; 238 return texture;
239} 239}
240 240
241constexpr u32 EncodeSwizzle(SwizzleSource x_source, SwizzleSource y_source, SwizzleSource z_source,
242 SwizzleSource w_source) {
243 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
244 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
245}
246
241} // Anonymous namespace 247} // Anonymous namespace
242 248
243CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, 249CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
@@ -381,7 +387,7 @@ void CachedSurface::DecorateSurfaceName() {
381} 387}
382 388
383void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { 389void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
384 LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); 390 LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
385} 391}
386 392
387View CachedSurface::CreateView(const ViewParams& view_key) { 393View CachedSurface::CreateView(const ViewParams& view_key) {
@@ -397,14 +403,13 @@ View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_pr
397} 403}
398 404
399CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, 405CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params,
400 const bool is_proxy) 406 bool is_proxy)
401 : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { 407 : VideoCommon::ViewBase(params), surface{surface},
402 target = GetTextureTarget(params.target); 408 format{GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format},
403 format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; 409 target{GetTextureTarget(params.target)}, is_proxy{is_proxy} {
404 if (!is_proxy) { 410 if (!is_proxy) {
405 texture_view = CreateTextureView(); 411 main_view = CreateTextureView();
406 } 412 }
407 swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A);
408} 413}
409 414
410CachedSurfaceView::~CachedSurfaceView() = default; 415CachedSurfaceView::~CachedSurfaceView() = default;
@@ -447,27 +452,49 @@ void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
447 } 452 }
448} 453}
449 454
450void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, 455GLuint CachedSurfaceView::GetTexture(SwizzleSource x_source, SwizzleSource y_source,
451 SwizzleSource z_source, SwizzleSource w_source) { 456 SwizzleSource z_source, SwizzleSource w_source) {
452 u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 457 if (GetSurfaceParams().IsBuffer()) {
453 if (new_swizzle == swizzle) 458 return GetTexture();
454 return; 459 }
455 swizzle = new_swizzle; 460 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
456 const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), 461 if (current_swizzle == new_swizzle) {
457 GetSwizzleSource(z_source), GetSwizzleSource(w_source)}; 462 return current_view;
458 const GLuint handle = GetTexture(); 463 }
459 const PixelFormat format = surface.GetSurfaceParams().pixel_format; 464 current_swizzle = new_swizzle;
460 switch (format) { 465
466 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
467 OGLTextureView& view = entry->second;
468 if (!is_cache_miss) {
469 current_view = view.handle;
470 return view.handle;
471 }
472 view = CreateTextureView();
473 current_view = view.handle;
474
475 std::array swizzle{x_source, y_source, z_source, w_source};
476
477 switch (const PixelFormat format = GetSurfaceParams().pixel_format) {
461 case PixelFormat::Z24S8: 478 case PixelFormat::Z24S8:
462 case PixelFormat::Z32FS8: 479 case PixelFormat::Z32FS8:
463 case PixelFormat::S8Z24: 480 case PixelFormat::S8Z24:
464 glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE, 481 UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
482 glTextureParameteri(view.handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
465 GetComponent(format, x_source == SwizzleSource::R)); 483 GetComponent(format, x_source == SwizzleSource::R));
484
485 // Make sure we sample the first component
486 std::transform(swizzle.begin(), swizzle.end(), swizzle.begin(), [](SwizzleSource value) {
487 return value == SwizzleSource::G ? SwizzleSource::R : value;
488 });
489 [[fallthrough]];
490 default: {
491 const std::array gl_swizzle = {GetSwizzleSource(swizzle[0]), GetSwizzleSource(swizzle[1]),
492 GetSwizzleSource(swizzle[2]), GetSwizzleSource(swizzle[3])};
493 glTextureParameteriv(view.handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
466 break; 494 break;
467 default:
468 glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
469 break;
470 } 495 }
496 }
497 return view.handle;
471} 498}
472 499
473OGLTextureView CachedSurfaceView::CreateTextureView() const { 500OGLTextureView CachedSurfaceView::CreateTextureView() const {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 02d9981a1..8a2ac8603 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -83,7 +83,7 @@ public:
83 /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER 83 /// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
84 void Attach(GLenum attachment, GLenum target) const; 84 void Attach(GLenum attachment, GLenum target) const;
85 85
86 void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, 86 GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
87 Tegra::Texture::SwizzleSource y_source, 87 Tegra::Texture::SwizzleSource y_source,
88 Tegra::Texture::SwizzleSource z_source, 88 Tegra::Texture::SwizzleSource z_source,
89 Tegra::Texture::SwizzleSource w_source); 89 Tegra::Texture::SwizzleSource w_source);
@@ -98,7 +98,7 @@ public:
98 if (is_proxy) { 98 if (is_proxy) {
99 return surface.GetTexture(); 99 return surface.GetTexture();
100 } 100 }
101 return texture_view.handle; 101 return main_view.handle;
102 } 102 }
103 103
104 GLenum GetFormat() const { 104 GLenum GetFormat() const {
@@ -110,23 +110,19 @@ public:
110 } 110 }
111 111
112private: 112private:
113 u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
114 Tegra::Texture::SwizzleSource y_source,
115 Tegra::Texture::SwizzleSource z_source,
116 Tegra::Texture::SwizzleSource w_source) const {
117 return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
118 (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
119 }
120
121 OGLTextureView CreateTextureView() const; 113 OGLTextureView CreateTextureView() const;
122 114
123 CachedSurface& surface; 115 CachedSurface& surface;
124 GLenum target{}; 116 const GLenum format;
125 GLenum format{}; 117 const GLenum target;
118 const bool is_proxy;
119
120 std::unordered_map<u32, OGLTextureView> view_cache;
121 OGLTextureView main_view;
126 122
127 OGLTextureView texture_view; 123 // Use an invalid default so it always fails the comparison test
128 u32 swizzle{}; 124 u32 current_swizzle = 0xffffffff;
129 bool is_proxy{}; 125 GLuint current_view = 0;
130}; 126};
131 127
132class TextureCacheOpenGL final : public TextureCacheBase { 128class TextureCacheOpenGL final : public TextureCacheBase {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b2a179746..e7952924a 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,7 +316,7 @@ public:
316RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, 316RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
317 Core::Frontend::GraphicsContext& context) 317 Core::Frontend::GraphicsContext& context)
318 : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, 318 : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
319 has_debug_tool{HasDebugTool()} {} 319 program_manager{device}, has_debug_tool{HasDebugTool()} {}
320 320
321RendererOpenGL::~RendererOpenGL() = default; 321RendererOpenGL::~RendererOpenGL() = default;
322 322
@@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() {
468 vertex_program.Create(true, false, vertex_shader.handle); 468 vertex_program.Create(true, false, vertex_shader.handle);
469 fragment_program.Create(true, false, fragment_shader.handle); 469 fragment_program.Create(true, false, fragment_shader.handle);
470 470
471 // Create program pipeline 471 pipeline.Create();
472 program_manager.Create(); 472 glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
473 glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
473 474
474 // Generate VBO handle for drawing 475 // Generate VBO handle for drawing
475 vertex_buffer.Create(); 476 vertex_buffer.Create();
@@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() {
508 if (rasterizer) { 509 if (rasterizer) {
509 return; 510 return;
510 } 511 }
511 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, 512 rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
512 program_manager, state_tracker); 513 program_manager, state_tracker);
513} 514}
514 515
@@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
620 state_tracker.NotifyClipControl(); 621 state_tracker.NotifyClipControl();
621 state_tracker.NotifyAlphaTest(); 622 state_tracker.NotifyAlphaTest();
622 623
623 program_manager.UseVertexShader(vertex_program.handle); 624 program_manager.BindHostPipeline(pipeline.handle);
624 program_manager.UseGeometryShader(0);
625 program_manager.UseFragmentShader(fragment_program.handle);
626 program_manager.BindGraphicsPipeline();
627 625
628 glEnable(GL_CULL_FACE); 626 glEnable(GL_CULL_FACE);
629 if (screen_info.display_srgb) { 627 if (screen_info.display_srgb) {
@@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
665 663
666 glClear(GL_COLOR_BUFFER_BIT); 664 glClear(GL_COLOR_BUFFER_BIT);
667 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); 665 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
666
667 program_manager.RestoreGuestPipeline();
668} 668}
669 669
670bool RendererOpenGL::TryPresent(int timeout_ms) { 670bool RendererOpenGL::TryPresent(int timeout_ms) {
@@ -753,6 +753,9 @@ void RendererOpenGL::RenderScreenshot() {
753bool RendererOpenGL::Init() { 753bool RendererOpenGL::Init() {
754 if (GLAD_GL_KHR_debug) { 754 if (GLAD_GL_KHR_debug) {
755 glEnable(GL_DEBUG_OUTPUT); 755 glEnable(GL_DEBUG_OUTPUT);
756 if (Settings::values.renderer_debug) {
757 glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
758 }
756 glDebugMessageCallback(DebugHandler, nullptr); 759 glDebugMessageCallback(DebugHandler, nullptr);
757 } 760 }
758 761
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 50b647661..61bf507f4 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/math_util.h" 10#include "common/math_util.h"
11#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
12#include "video_core/renderer_opengl/gl_device.h"
12#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
13#include "video_core/renderer_opengl/gl_shader_manager.h" 14#include "video_core/renderer_opengl/gl_shader_manager.h"
14#include "video_core/renderer_opengl/gl_state_tracker.h" 15#include "video_core/renderer_opengl/gl_state_tracker.h"
@@ -95,6 +96,7 @@ private:
95 Core::Frontend::EmuWindow& emu_window; 96 Core::Frontend::EmuWindow& emu_window;
96 Core::System& system; 97 Core::System& system;
97 Core::Frontend::GraphicsContext& context; 98 Core::Frontend::GraphicsContext& context;
99 const Device device;
98 100
99 StateTracker state_tracker{system}; 101 StateTracker state_tracker{system};
100 102
@@ -102,13 +104,14 @@ private:
102 OGLBuffer vertex_buffer; 104 OGLBuffer vertex_buffer;
103 OGLProgram vertex_program; 105 OGLProgram vertex_program;
104 OGLProgram fragment_program; 106 OGLProgram fragment_program;
107 OGLPipeline pipeline;
105 OGLFramebuffer screenshot_framebuffer; 108 OGLFramebuffer screenshot_framebuffer;
106 109
107 /// Display information for Switch screen 110 /// Display information for Switch screen
108 ScreenInfo screen_info; 111 ScreenInfo screen_info;
109 112
110 /// Global dummy shader pipeline 113 /// Global dummy shader pipeline
111 GLShader::ProgramManager program_manager; 114 ProgramManager program_manager;
112 115
113 /// OpenGL framebuffer data 116 /// OpenGL framebuffer data
114 std::vector<u8> gl_framebuffer_data; 117 std::vector<u8> gl_framebuffer_data;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 12be691a5..2871035f5 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -142,7 +142,7 @@ struct FormatTuple {
142 {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16 142 {VK_FORMAT_BC6H_UFLOAT_BLOCK}, // BC6H_UF16
143 {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16 143 {VK_FORMAT_BC6H_SFLOAT_BLOCK}, // BC6H_SF16
144 {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4 144 {VK_FORMAT_ASTC_4x4_UNORM_BLOCK}, // ASTC_2D_4X4
145 {VK_FORMAT_B8G8R8A8_UNORM}, // BGRA8 145 {VK_FORMAT_B8G8R8A8_UNORM, Attachable}, // BGRA8
146 {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F 146 {VK_FORMAT_R32G32B32A32_SFLOAT, Attachable | Storage}, // RGBA32F
147 {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F 147 {VK_FORMAT_R32G32_SFLOAT, Attachable | Storage}, // RG32F
148 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F 148 {VK_FORMAT_R32_SFLOAT, Attachable | Storage}, // R32F
@@ -168,7 +168,7 @@ struct FormatTuple {
168 {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8 168 {VK_FORMAT_ASTC_8x8_UNORM_BLOCK}, // ASTC_2D_8X8
169 {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5 169 {VK_FORMAT_UNDEFINED}, // ASTC_2D_8X5
170 {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4 170 {VK_FORMAT_UNDEFINED}, // ASTC_2D_5X4
171 {VK_FORMAT_UNDEFINED}, // BGRA8_SRGB 171 {VK_FORMAT_B8G8R8A8_SRGB, Attachable}, // BGRA8_SRGB
172 {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB 172 {VK_FORMAT_BC1_RGBA_SRGB_BLOCK}, // DXT1_SRGB
173 {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB 173 {VK_FORMAT_BC2_SRGB_BLOCK}, // DXT23_SRGB
174 {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB 174 {VK_FORMAT_BC3_SRGB_BLOCK}, // DXT45_SRGB
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 5b494da8c..5f33d9e40 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -7,6 +7,7 @@
7#include <memory> 7#include <memory>
8 8
9#include "core/core.h" 9#include "core/core.h"
10#include "video_core/buffer_cache/buffer_cache.h"
10#include "video_core/renderer_vulkan/vk_buffer_cache.h" 11#include "video_core/renderer_vulkan/vk_buffer_cache.h"
11#include "video_core/renderer_vulkan/vk_device.h" 12#include "video_core/renderer_vulkan/vk_device.h"
12#include "video_core/renderer_vulkan/vk_scheduler.h" 13#include "video_core/renderer_vulkan/vk_scheduler.h"
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index f0c491d00..750e5a0ca 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -104,6 +104,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
104 VK_FORMAT_R16_SFLOAT, 104 VK_FORMAT_R16_SFLOAT,
105 VK_FORMAT_R16G16B16A16_SFLOAT, 105 VK_FORMAT_R16G16B16A16_SFLOAT,
106 VK_FORMAT_B8G8R8A8_UNORM, 106 VK_FORMAT_B8G8R8A8_UNORM,
107 VK_FORMAT_B8G8R8A8_SRGB,
107 VK_FORMAT_R4G4B4A4_UNORM_PACK16, 108 VK_FORMAT_R4G4B4A4_UNORM_PACK16,
108 VK_FORMAT_D32_SFLOAT, 109 VK_FORMAT_D32_SFLOAT,
109 VK_FORMAT_D16_UNORM, 110 VK_FORMAT_D16_UNORM,
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 04d07fe6a..043fe7947 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -7,6 +7,7 @@
7#include <memory> 7#include <memory>
8 8
9#include "video_core/fence_manager.h" 9#include "video_core/fence_manager.h"
10#include "video_core/renderer_vulkan/vk_buffer_cache.h"
10#include "video_core/renderer_vulkan/wrapper.h" 11#include "video_core/renderer_vulkan/wrapper.h"
11 12
12namespace Core { 13namespace Core {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 890175d2d..65a1c6245 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -331,8 +331,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
331 331
332 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); 332 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
333 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); 333 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
334 ASSERT(cpu_addr); 334 const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
335 const auto shader = TryGet(*cpu_addr);
336 ASSERT(shader); 335 ASSERT(shader);
337 336
338 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 337 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index cf15e6d1c..a3d992ed3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -532,14 +532,14 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
532 return; 532 return;
533 } 533 }
534 texture_cache.OnCPUWrite(addr, size); 534 texture_cache.OnCPUWrite(addr, size);
535 pipeline_cache.InvalidateRegion(addr, size); 535 pipeline_cache.OnCPUWrite(addr, size);
536 buffer_cache.OnCPUWrite(addr, size); 536 buffer_cache.OnCPUWrite(addr, size);
537 query_cache.InvalidateRegion(addr, size);
538} 537}
539 538
540void RasterizerVulkan::SyncGuestHost() { 539void RasterizerVulkan::SyncGuestHost() {
541 texture_cache.SyncGuestHost(); 540 texture_cache.SyncGuestHost();
542 buffer_cache.SyncGuestHost(); 541 buffer_cache.SyncGuestHost();
542 pipeline_cache.SyncGuestHost();
543} 543}
544 544
545void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { 545void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
@@ -569,7 +569,9 @@ void RasterizerVulkan::ReleaseFences() {
569} 569}
570 570
571void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { 571void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
572 FlushRegion(addr, size); 572 if (Settings::IsGPULevelExtreme()) {
573 FlushRegion(addr, size);
574 }
573 InvalidateRegion(addr, size); 575 InvalidateRegion(addr, size);
574} 576}
575 577
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 6ce6bfcb5..a13e8baa7 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -515,6 +515,16 @@ private:
515 void DeclareCommon() { 515 void DeclareCommon() {
516 thread_id = 516 thread_id =
517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); 517 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id");
518 thread_masks[0] =
519 DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask");
520 thread_masks[1] =
521 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask");
522 thread_masks[2] =
523 DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask");
524 thread_masks[3] =
525 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask");
526 thread_masks[4] =
527 DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask");
518 } 528 }
519 529
520 void DeclareVertex() { 530 void DeclareVertex() {
@@ -1077,8 +1087,7 @@ private:
1077 1087
1078 void VisitBasicBlock(const NodeBlock& bb) { 1088 void VisitBasicBlock(const NodeBlock& bb) {
1079 for (const auto& node : bb) { 1089 for (const auto& node : bb) {
1080 [[maybe_unused]] const Type type = Visit(node).type; 1090 Visit(node);
1081 ASSERT(type == Type::Void);
1082 } 1091 }
1083 } 1092 }
1084 1093
@@ -1372,7 +1381,9 @@ private:
1372 Expression target{}; 1381 Expression target{};
1373 if (const auto gpr = std::get_if<GprNode>(&*dest)) { 1382 if (const auto gpr = std::get_if<GprNode>(&*dest)) {
1374 if (gpr->GetIndex() == Register::ZeroIndex) { 1383 if (gpr->GetIndex() == Register::ZeroIndex) {
1375 // Writing to Register::ZeroIndex is a no op 1384 // Writing to Register::ZeroIndex is a no op but we still have to visit its source
1385 // because it might have side effects.
1386 Visit(src);
1376 return {}; 1387 return {};
1377 } 1388 }
1378 target = {registers.at(gpr->GetIndex()), Type::Float}; 1389 target = {registers.at(gpr->GetIndex()), Type::Float};
@@ -1628,6 +1639,24 @@ private:
1628 return {}; 1639 return {};
1629 } 1640 }
1630 1641
1642 Expression LogicalFOrdered(Operation operation) {
1643 // Emulate SPIR-V's OpOrdered
1644 const Id op_a = AsFloat(Visit(operation[0]));
1645 const Id op_b = AsFloat(Visit(operation[1]));
1646 const Id is_num_a = OpFOrdEqual(t_bool, op_a, op_a);
1647 const Id is_num_b = OpFOrdEqual(t_bool, op_b, op_b);
1648 return {OpLogicalAnd(t_bool, is_num_a, is_num_b), Type::Bool};
1649 }
1650
1651 Expression LogicalFUnordered(Operation operation) {
1652 // Emulate SPIR-V's OpUnordered
1653 const Id op_a = AsFloat(Visit(operation[0]));
1654 const Id op_b = AsFloat(Visit(operation[1]));
1655 const Id is_nan_a = OpIsNan(t_bool, op_a);
1656 const Id is_nan_b = OpIsNan(t_bool, op_b);
1657 return {OpLogicalOr(t_bool, is_nan_a, is_nan_b), Type::Bool};
1658 }
1659
1631 Id GetTextureSampler(Operation operation) { 1660 Id GetTextureSampler(Operation operation) {
1632 const auto& meta = std::get<MetaTexture>(operation.GetMeta()); 1661 const auto& meta = std::get<MetaTexture>(operation.GetMeta());
1633 ASSERT(!meta.sampler.is_buffer); 1662 ASSERT(!meta.sampler.is_buffer);
@@ -2167,14 +2196,37 @@ private:
2167 return {OpLoad(t_uint, thread_id), Type::Uint}; 2196 return {OpLoad(t_uint, thread_id), Type::Uint};
2168 } 2197 }
2169 2198
2199 template <std::size_t index>
2200 Expression ThreadMask(Operation) {
2201 // TODO(Rodrigo): Handle devices with different warp sizes
2202 const Id mask = thread_masks[index];
2203 return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint};
2204 }
2205
2170 Expression ShuffleIndexed(Operation operation) { 2206 Expression ShuffleIndexed(Operation operation) {
2171 const Id value = AsFloat(Visit(operation[0])); 2207 const Id value = AsFloat(Visit(operation[0]));
2172 const Id index = AsUint(Visit(operation[1])); 2208 const Id index = AsUint(Visit(operation[1]));
2173 return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float}; 2209 return {OpSubgroupReadInvocationKHR(t_float, value, index), Type::Float};
2174 } 2210 }
2175 2211
2176 Expression MemoryBarrierGL(Operation) { 2212 Expression Barrier(Operation) {
2177 const auto scope = spv::Scope::Device; 2213 if (!ir.IsDecompiled()) {
2214 LOG_ERROR(Render_Vulkan, "OpBarrier used by shader is not decompiled");
2215 return {};
2216 }
2217
2218 const auto scope = spv::Scope::Workgroup;
2219 const auto memory = spv::Scope::Workgroup;
2220 const auto semantics =
2221 spv::MemorySemanticsMask::WorkgroupMemory | spv::MemorySemanticsMask::AcquireRelease;
2222 OpControlBarrier(Constant(t_uint, static_cast<u32>(scope)),
2223 Constant(t_uint, static_cast<u32>(memory)),
2224 Constant(t_uint, static_cast<u32>(semantics)));
2225 return {};
2226 }
2227
2228 template <spv::Scope scope>
2229 Expression MemoryBarrier(Operation) {
2178 const auto semantics = 2230 const auto semantics =
2179 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory | 2231 spv::MemorySemanticsMask::AcquireRelease | spv::MemorySemanticsMask::UniformMemory |
2180 spv::MemorySemanticsMask::WorkgroupMemory | 2232 spv::MemorySemanticsMask::WorkgroupMemory |
@@ -2521,7 +2573,14 @@ private:
2521 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>, 2573 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThan, Type::Bool, Type::Float>,
2522 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>, 2574 &SPIRVDecompiler::Binary<&Module::OpFOrdNotEqual, Type::Bool, Type::Float>,
2523 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>, 2575 &SPIRVDecompiler::Binary<&Module::OpFOrdGreaterThanEqual, Type::Bool, Type::Float>,
2524 &SPIRVDecompiler::Unary<&Module::OpIsNan, Type::Bool, Type::Float>, 2576 &SPIRVDecompiler::LogicalFOrdered,
2577 &SPIRVDecompiler::LogicalFUnordered,
2578 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThan, Type::Bool, Type::Float>,
2579 &SPIRVDecompiler::Binary<&Module::OpFUnordEqual, Type::Bool, Type::Float>,
2580 &SPIRVDecompiler::Binary<&Module::OpFUnordLessThanEqual, Type::Bool, Type::Float>,
2581 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThan, Type::Bool, Type::Float>,
2582 &SPIRVDecompiler::Binary<&Module::OpFUnordNotEqual, Type::Bool, Type::Float>,
2583 &SPIRVDecompiler::Binary<&Module::OpFUnordGreaterThanEqual, Type::Bool, Type::Float>,
2525 2584
2526 &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>, 2585 &SPIRVDecompiler::Binary<&Module::OpSLessThan, Type::Bool, Type::Int>,
2527 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>, 2586 &SPIRVDecompiler::Binary<&Module::OpIEqual, Type::Bool, Type::Int>,
@@ -2624,9 +2683,16 @@ private:
2624 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, 2683 &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>,
2625 2684
2626 &SPIRVDecompiler::ThreadId, 2685 &SPIRVDecompiler::ThreadId,
2686 &SPIRVDecompiler::ThreadMask<0>, // Eq
2687 &SPIRVDecompiler::ThreadMask<1>, // Ge
2688 &SPIRVDecompiler::ThreadMask<2>, // Gt
2689 &SPIRVDecompiler::ThreadMask<3>, // Le
2690 &SPIRVDecompiler::ThreadMask<4>, // Lt
2627 &SPIRVDecompiler::ShuffleIndexed, 2691 &SPIRVDecompiler::ShuffleIndexed,
2628 2692
2629 &SPIRVDecompiler::MemoryBarrierGL, 2693 &SPIRVDecompiler::Barrier,
2694 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Workgroup>,
2695 &SPIRVDecompiler::MemoryBarrier<spv::Scope::Device>,
2630 }; 2696 };
2631 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); 2697 static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
2632 2698
@@ -2748,6 +2814,7 @@ private:
2748 Id workgroup_id{}; 2814 Id workgroup_id{};
2749 Id local_invocation_id{}; 2815 Id local_invocation_id{};
2750 Id thread_id{}; 2816 Id thread_id{};
2817 std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt
2751 2818
2752 VertexIndices in_indices; 2819 VertexIndices in_indices;
2753 VertexIndices out_indices; 2820 VertexIndices out_indices;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 55f43e61b..2f1d5021d 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -354,26 +354,23 @@ CachedSurfaceView::~CachedSurfaceView() = default;
354 354
355VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source, 355VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
356 SwizzleSource z_source, SwizzleSource w_source) { 356 SwizzleSource z_source, SwizzleSource w_source) {
357 const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); 357 const u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
358 if (last_image_view && last_swizzle == swizzle) { 358 if (last_image_view && last_swizzle == new_swizzle) {
359 return last_image_view; 359 return last_image_view;
360 } 360 }
361 last_swizzle = swizzle; 361 last_swizzle = new_swizzle;
362 362
363 const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle); 363 const auto [entry, is_cache_miss] = view_cache.try_emplace(new_swizzle);
364 auto& image_view = entry->second; 364 auto& image_view = entry->second;
365 if (!is_cache_miss) { 365 if (!is_cache_miss) {
366 return last_image_view = *image_view; 366 return last_image_view = *image_view;
367 } 367 }
368 368
369 auto swizzle_x = MaxwellToVK::SwizzleSource(x_source); 369 std::array swizzle{MaxwellToVK::SwizzleSource(x_source), MaxwellToVK::SwizzleSource(y_source),
370 auto swizzle_y = MaxwellToVK::SwizzleSource(y_source); 370 MaxwellToVK::SwizzleSource(z_source), MaxwellToVK::SwizzleSource(w_source)};
371 auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
372 auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
373
374 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) { 371 if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
375 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here. 372 // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
376 std::swap(swizzle_x, swizzle_z); 373 std::swap(swizzle[0], swizzle[2]);
377 } 374 }
378 375
379 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on 376 // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
@@ -395,11 +392,11 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
395 UNIMPLEMENTED(); 392 UNIMPLEMENTED();
396 } 393 }
397 394
398 // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity 395 // Make sure we sample the first component
399 swizzle_x = VK_COMPONENT_SWIZZLE_R; 396 std::transform(
400 swizzle_y = VK_COMPONENT_SWIZZLE_G; 397 swizzle.begin(), swizzle.end(), swizzle.begin(), [](VkComponentSwizzle component) {
401 swizzle_z = VK_COMPONENT_SWIZZLE_B; 398 return component == VK_COMPONENT_SWIZZLE_G ? VK_COMPONENT_SWIZZLE_R : component;
402 swizzle_w = VK_COMPONENT_SWIZZLE_A; 399 });
403 } 400 }
404 401
405 VkImageViewCreateInfo ci; 402 VkImageViewCreateInfo ci;
@@ -409,7 +406,7 @@ VkImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y
409 ci.image = surface.GetImageHandle(); 406 ci.image = surface.GetImageHandle();
410 ci.viewType = image_view_type; 407 ci.viewType = image_view_type;
411 ci.format = surface.GetImage().GetFormat(); 408 ci.format = surface.GetImage().GetFormat();
412 ci.components = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; 409 ci.components = {swizzle[0], swizzle[1], swizzle[2], swizzle[3]};
413 ci.subresourceRange.aspectMask = aspect; 410 ci.subresourceRange.aspectMask = aspect;
414 ci.subresourceRange.baseMipLevel = base_level; 411 ci.subresourceRange.baseMipLevel = base_level;
415 ci.subresourceRange.levelCount = num_levels; 412 ci.subresourceRange.levelCount = num_levels;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index a75a5cc63..eeac328a6 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -255,7 +255,7 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
255 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); 255 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
256 Node op_b = Immediate(branch_case.cmp_value); 256 Node op_b = Immediate(branch_case.cmp_value);
257 Node condition = 257 Node condition =
258 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); 258 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
259 auto result = Conditional(condition, {n}); 259 auto result = Conditional(condition, {n});
260 bb.push_back(result); 260 bb.push_back(result);
261 global_code.push_back(result); 261 global_code.push_back(result);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 9392f065b..63adbc4a3 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -387,7 +387,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
387 } 387 }
388 case OpCode::Id::RED: { 388 case OpCode::Id::RED: {
389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); 389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
390 UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
391 const auto [real_address, base_address, descriptor] = 390 const auto [real_address, base_address, descriptor] =
392 TrackGlobalMemory(bb, instr, true, true); 391 TrackGlobalMemory(bb, instr, true, true);
393 if (!real_address || !base_address) { 392 if (!real_address || !base_address) {
@@ -396,7 +395,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
396 } 395 }
397 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 396 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
398 Node value = GetRegister(instr.gpr0); 397 Node value = GetRegister(instr.gpr0);
399 bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); 398 bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
400 break; 399 break;
401 } 400 }
402 case OpCode::Id::ATOM: { 401 case OpCode::Id::ATOM: {
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d4f95b18c..d00e10913 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
109 return Operation(OperationCode::WorkGroupIdY); 109 return Operation(OperationCode::WorkGroupIdY);
110 case SystemVariable::CtaIdZ: 110 case SystemVariable::CtaIdZ:
111 return Operation(OperationCode::WorkGroupIdZ); 111 return Operation(OperationCode::WorkGroupIdZ);
112 case SystemVariable::EqMask:
113 case SystemVariable::LtMask:
114 case SystemVariable::LeMask:
115 case SystemVariable::GtMask:
116 case SystemVariable::GeMask:
117 uses_warps = true;
118 switch (instr.sys20) {
119 case SystemVariable::EqMask:
120 return Operation(OperationCode::ThreadEqMask);
121 case SystemVariable::LtMask:
122 return Operation(OperationCode::ThreadLtMask);
123 case SystemVariable::LeMask:
124 return Operation(OperationCode::ThreadLeMask);
125 case SystemVariable::GtMask:
126 return Operation(OperationCode::ThreadGtMask);
127 case SystemVariable::GeMask:
128 return Operation(OperationCode::ThreadGeMask);
129 default:
130 UNREACHABLE();
131 return Immediate(0u);
132 }
112 default: 133 default:
113 UNIMPLEMENTED_MSG("Unhandled system move: {}", 134 UNIMPLEMENTED_MSG("Unhandled system move: {}",
114 static_cast<u32>(instr.sys20.Value())); 135 static_cast<u32>(instr.sys20.Value()));
@@ -272,10 +293,25 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
272 SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); 293 SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
273 break; 294 break;
274 } 295 }
296 case OpCode::Id::BAR: {
297 UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
298 bb.push_back(Operation(OperationCode::Barrier));
299 break;
300 }
275 case OpCode::Id::MEMBAR: { 301 case OpCode::Id::MEMBAR: {
276 UNIMPLEMENTED_IF(instr.membar.type != Tegra::Shader::MembarType::GL);
277 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); 302 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
278 bb.push_back(Operation(OperationCode::MemoryBarrierGL)); 303 const OperationCode type = [instr] {
304 switch (instr.membar.type) {
305 case Tegra::Shader::MembarType::CTA:
306 return OperationCode::MemoryBarrierGroup;
307 case Tegra::Shader::MembarType::GL:
308 return OperationCode::MemoryBarrierGlobal;
309 default:
310 UNIMPLEMENTED_MSG("MEMBAR type={}", static_cast<int>(instr.membar.type.Value()));
311 return OperationCode::MemoryBarrierGlobal;
312 }
313 }();
314 bb.push_back(Operation(type));
279 break; 315 break;
280 } 316 }
281 case OpCode::Id::DEPBAR: { 317 case OpCode::Id::DEPBAR: {
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 6191ffba1..c83dc6615 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -97,19 +97,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
97 return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); 97 return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
98 } 98 }
99 case Tegra::Shader::XmadMode::CSfu: { 99 case Tegra::Shader::XmadMode::CSfu: {
100 const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a, 100 const Node comp_a =
101 op_a, Immediate(0)); 101 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
102 const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b, 102 const Node comp_b =
103 op_b, Immediate(0)); 103 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
104 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); 104 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
105 105
106 const Node comp_minus_a = GetPredicateComparisonInteger( 106 const Node comp_minus_a = GetPredicateComparisonInteger(
107 PredCondition::NotEqual, is_signed_a, 107 PredCondition::NE, is_signed_a,
108 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, 108 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
109 Immediate(0x80000000)), 109 Immediate(0x80000000)),
110 Immediate(0)); 110 Immediate(0));
111 const Node comp_minus_b = GetPredicateComparisonInteger( 111 const Node comp_minus_b = GetPredicateComparisonInteger(
112 PredCondition::NotEqual, is_signed_b, 112 PredCondition::NE, is_signed_b,
113 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, 113 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
114 Immediate(0x80000000)), 114 Immediate(0x80000000)),
115 Immediate(0)); 115 Immediate(0));
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 601c822d2..c5e5165ff 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -110,13 +110,20 @@ enum class OperationCode {
110 LogicalPick2, /// (bool2 pair, uint index) -> bool 110 LogicalPick2, /// (bool2 pair, uint index) -> bool
111 LogicalAnd2, /// (bool2 a) -> bool 111 LogicalAnd2, /// (bool2 a) -> bool
112 112
113 LogicalFLessThan, /// (float a, float b) -> bool 113 LogicalFOrdLessThan, /// (float a, float b) -> bool
114 LogicalFEqual, /// (float a, float b) -> bool 114 LogicalFOrdEqual, /// (float a, float b) -> bool
115 LogicalFLessEqual, /// (float a, float b) -> bool 115 LogicalFOrdLessEqual, /// (float a, float b) -> bool
116 LogicalFGreaterThan, /// (float a, float b) -> bool 116 LogicalFOrdGreaterThan, /// (float a, float b) -> bool
117 LogicalFNotEqual, /// (float a, float b) -> bool 117 LogicalFOrdNotEqual, /// (float a, float b) -> bool
118 LogicalFGreaterEqual, /// (float a, float b) -> bool 118 LogicalFOrdGreaterEqual, /// (float a, float b) -> bool
119 LogicalFIsNan, /// (float a) -> bool 119 LogicalFOrdered, /// (float a, float b) -> bool
120 LogicalFUnordered, /// (float a, float b) -> bool
121 LogicalFUnordLessThan, /// (float a, float b) -> bool
122 LogicalFUnordEqual, /// (float a, float b) -> bool
123 LogicalFUnordLessEqual, /// (float a, float b) -> bool
124 LogicalFUnordGreaterThan, /// (float a, float b) -> bool
125 LogicalFUnordNotEqual, /// (float a, float b) -> bool
126 LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
120 127
121 LogicalILessThan, /// (int a, int b) -> bool 128 LogicalILessThan, /// (int a, int b) -> bool
122 LogicalIEqual, /// (int a, int b) -> bool 129 LogicalIEqual, /// (int a, int b) -> bool
@@ -219,9 +226,16 @@ enum class OperationCode {
219 VoteEqual, /// (bool) -> bool 226 VoteEqual, /// (bool) -> bool
220 227
221 ThreadId, /// () -> uint 228 ThreadId, /// () -> uint
229 ThreadEqMask, /// () -> uint
230 ThreadGeMask, /// () -> uint
231 ThreadGtMask, /// () -> uint
232 ThreadLeMask, /// () -> uint
233 ThreadLtMask, /// () -> uint
222 ShuffleIndexed, /// (uint value, uint index) -> uint 234 ShuffleIndexed, /// (uint value, uint index) -> uint
223 235
224 MemoryBarrierGL, /// () -> void 236 Barrier, /// () -> void
237 MemoryBarrierGroup, /// () -> void
238 MemoryBarrierGlobal, /// () -> void
225 239
226 Amount, 240 Amount,
227}; 241};
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 822674926..e322c3402 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -10,6 +10,7 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "common/logging/log.h" 11#include "common/logging/log.h"
12#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
13#include "video_core/shader/node.h"
13#include "video_core/shader/node_helper.h" 14#include "video_core/shader/node_helper.h"
14#include "video_core/shader/registry.h" 15#include "video_core/shader/registry.h"
15#include "video_core/shader/shader_ir.h" 16#include "video_core/shader/shader_ir.h"
@@ -243,56 +244,44 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
243} 244}
244 245
245Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 246Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
247 if (condition == PredCondition::T) {
248 return GetPredicate(true);
249 } else if (condition == PredCondition::F) {
250 return GetPredicate(false);
251 }
252
246 static constexpr std::array comparison_table{ 253 static constexpr std::array comparison_table{
247 std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan}, 254 OperationCode(0),
248 std::pair{PredCondition::Equal, OperationCode::LogicalFEqual}, 255 OperationCode::LogicalFOrdLessThan, // LT
249 std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, 256 OperationCode::LogicalFOrdEqual, // EQ
250 std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, 257 OperationCode::LogicalFOrdLessEqual, // LE
251 std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, 258 OperationCode::LogicalFOrdGreaterThan, // GT
252 std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, 259 OperationCode::LogicalFOrdNotEqual, // NE
253 std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, 260 OperationCode::LogicalFOrdGreaterEqual, // GE
254 std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, 261 OperationCode::LogicalFOrdered, // NUM
255 std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, 262 OperationCode::LogicalFUnordered, // NAN
256 std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, 263 OperationCode::LogicalFUnordLessThan, // LTU
257 std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}, 264 OperationCode::LogicalFUnordEqual, // EQU
265 OperationCode::LogicalFUnordLessEqual, // LEU
266 OperationCode::LogicalFUnordGreaterThan, // GTU
267 OperationCode::LogicalFUnordNotEqual, // NEU
268 OperationCode::LogicalFUnordGreaterEqual, // GEU
258 }; 269 };
270 const std::size_t index = static_cast<std::size_t>(condition);
271 ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
259 272
260 const auto comparison = 273 return Operation(comparison_table[index], op_a, op_b);
261 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
262 [condition](const auto entry) { return condition == entry.first; });
263 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
264 "Unknown predicate comparison operation");
265
266 Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
267
268 if (condition == PredCondition::LessThanWithNan ||
269 condition == PredCondition::NotEqualWithNan ||
270 condition == PredCondition::LessEqualWithNan ||
271 condition == PredCondition::GreaterThanWithNan ||
272 condition == PredCondition::GreaterEqualWithNan) {
273 predicate = Operation(OperationCode::LogicalOr, predicate,
274 Operation(OperationCode::LogicalFIsNan, op_a));
275 predicate = Operation(OperationCode::LogicalOr, predicate,
276 Operation(OperationCode::LogicalFIsNan, op_b));
277 }
278
279 return predicate;
280} 274}
281 275
282Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, 276Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
283 Node op_b) { 277 Node op_b) {
284 static constexpr std::array comparison_table{ 278 static constexpr std::array comparison_table{
285 std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan}, 279 std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
286 std::pair{PredCondition::Equal, OperationCode::LogicalIEqual}, 280 std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
287 std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, 281 std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
288 std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, 282 std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
289 std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, 283 std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
290 std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, 284 std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
291 std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
292 std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
293 std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
294 std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
295 std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
296 }; 285 };
297 286
298 const auto comparison = 287 const auto comparison =
@@ -301,32 +290,24 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
301 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), 290 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
302 "Unknown predicate comparison operation"); 291 "Unknown predicate comparison operation");
303 292
304 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), 293 return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
305 std::move(op_b)); 294 std::move(op_b));
306
307 UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan ||
308 condition == PredCondition::NotEqualWithNan ||
309 condition == PredCondition::LessEqualWithNan ||
310 condition == PredCondition::GreaterThanWithNan ||
311 condition == PredCondition::GreaterEqualWithNan,
312 "NaN comparisons for integers are not implemented");
313 return predicate;
314} 295}
315 296
316Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, 297Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
317 Node op_b) { 298 Node op_b) {
318 static constexpr std::array comparison_table{ 299 static constexpr std::array comparison_table{
319 std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan}, 300 std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
320 std::pair{PredCondition::Equal, OperationCode::Logical2HEqual}, 301 std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
321 std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, 302 std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
322 std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, 303 std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
323 std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, 304 std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
324 std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, 305 std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
325 std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, 306 std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
326 std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, 307 std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
327 std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, 308 std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
328 std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, 309 std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
329 std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}, 310 std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
330 }; 311 };
331 312
332 const auto comparison = 313 const auto comparison =
@@ -397,7 +378,7 @@ void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc
397 if (!sets_cc) { 378 if (!sets_cc) {
398 return; 379 return;
399 } 380 }
400 Node zerop = Operation(OperationCode::LogicalFEqual, std::move(value), Immediate(0.0f)); 381 Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
401 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); 382 SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
402 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); 383 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
403} 384}
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d6efc34b2..45e3ddd2c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
14#include <unordered_map> 14#include <unordered_map>
15#include <vector> 15#include <vector>
16 16
17#include <boost/container/small_vector.hpp>
17#include <boost/icl/interval_map.hpp> 18#include <boost/icl/interval_map.hpp>
18#include <boost/range/iterator_range.hpp> 19#include <boost/range/iterator_range.hpp>
19 20
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
53 54
54template <typename TSurface, typename TView> 55template <typename TSurface, typename TView>
55class TextureCache { 56class TextureCache {
57 using VectorSurface = boost::container::small_vector<TSurface, 1>;
56 58
57public: 59public:
58 void InvalidateRegion(VAddr addr, std::size_t size) { 60 void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -308,18 +310,20 @@ public:
308 dst_surface.first->MarkAsModified(true, Tick()); 310 dst_surface.first->MarkAsModified(true, Tick());
309 } 311 }
310 312
311 TSurface TryFindFramebufferSurface(VAddr addr) { 313 TSurface TryFindFramebufferSurface(VAddr addr) const {
312 if (!addr) { 314 if (!addr) {
313 return nullptr; 315 return nullptr;
314 } 316 }
315 const VAddr page = addr >> registry_page_bits; 317 const VAddr page = addr >> registry_page_bits;
316 std::vector<TSurface>& list = registry[page]; 318 const auto it = registry.find(page);
317 for (auto& surface : list) { 319 if (it == registry.end()) {
318 if (surface->GetCpuAddr() == addr) { 320 return nullptr;
319 return surface;
320 }
321 } 321 }
322 return nullptr; 322 const auto& list = it->second;
323 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
324 return surface->GetCpuAddr() == addr;
325 });
326 return found != list.end() ? *found : nullptr;
323 } 327 }
324 328
325 u64 Tick() { 329 u64 Tick() {
@@ -498,7 +502,7 @@ private:
498 * @param untopological Indicates to the recycler that the texture has no way 502 * @param untopological Indicates to the recycler that the texture has no way
499 * to match the overlaps due to topological reasons. 503 * to match the overlaps due to topological reasons.
500 **/ 504 **/
501 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 505 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
502 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 506 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
503 if (Settings::IsGPULevelExtreme()) { 507 if (Settings::IsGPULevelExtreme()) {
504 return RecycleStrategy::Flush; 508 return RecycleStrategy::Flush;
@@ -538,9 +542,8 @@ private:
538 * @param untopological Indicates to the recycler that the texture has no way to match the 542 * @param untopological Indicates to the recycler that the texture has no way to match the
539 * overlaps due to topological reasons. 543 * overlaps due to topological reasons.
540 **/ 544 **/
541 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 545 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
542 const SurfaceParams& params, const GPUVAddr gpu_addr, 546 const GPUVAddr gpu_addr, const bool preserve_contents,
543 const bool preserve_contents,
544 const MatchTopologyResult untopological) { 547 const MatchTopologyResult untopological) {
545 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); 548 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
546 for (auto& surface : overlaps) { 549 for (auto& surface : overlaps) {
@@ -650,7 +653,7 @@ private:
650 * @param params The parameters on the new surface. 653 * @param params The parameters on the new surface.
651 * @param gpu_addr The starting address of the new surface. 654 * @param gpu_addr The starting address of the new surface.
652 **/ 655 **/
653 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, 656 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
654 const SurfaceParams& params, 657 const SurfaceParams& params,
655 const GPUVAddr gpu_addr) { 658 const GPUVAddr gpu_addr) {
656 if (params.target == SurfaceTarget::Texture3D) { 659 if (params.target == SurfaceTarget::Texture3D) {
@@ -708,7 +711,7 @@ private:
708 * @param preserve_contents Indicates that the new surface should be loaded from memory or 711 * @param preserve_contents Indicates that the new surface should be loaded from memory or
709 * left blank. 712 * left blank.
710 */ 713 */
711 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 714 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
712 const SurfaceParams& params, 715 const SurfaceParams& params,
713 const GPUVAddr gpu_addr, 716 const GPUVAddr gpu_addr,
714 const VAddr cpu_addr, 717 const VAddr cpu_addr,
@@ -810,7 +813,7 @@ private:
810 TSurface& current_surface = iter->second; 813 TSurface& current_surface = iter->second;
811 const auto topological_result = current_surface->MatchesTopology(params); 814 const auto topological_result = current_surface->MatchesTopology(params);
812 if (topological_result != MatchTopologyResult::FullMatch) { 815 if (topological_result != MatchTopologyResult::FullMatch) {
813 std::vector<TSurface> overlaps{current_surface}; 816 VectorSurface overlaps{current_surface};
814 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 817 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
815 topological_result); 818 topological_result);
816 } 819 }
@@ -991,7 +994,9 @@ private:
991 params.target = target; 994 params.target = target;
992 params.is_tiled = false; 995 params.is_tiled = false;
993 params.srgb_conversion = false; 996 params.srgb_conversion = false;
994 params.is_layered = false; 997 params.is_layered =
998 target == SurfaceTarget::Texture1DArray || target == SurfaceTarget::Texture2DArray ||
999 target == SurfaceTarget::TextureCubemap || target == SurfaceTarget::TextureCubeArray;
995 params.block_width = 0; 1000 params.block_width = 0;
996 params.block_height = 0; 1001 params.block_height = 0;
997 params.block_depth = 0; 1002 params.block_depth = 0;
@@ -1124,23 +1129,25 @@ private:
1124 } 1129 }
1125 } 1130 }
1126 1131
1127 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1132 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1128 if (size == 0) { 1133 if (size == 0) {
1129 return {}; 1134 return {};
1130 } 1135 }
1131 const VAddr cpu_addr_end = cpu_addr + size; 1136 const VAddr cpu_addr_end = cpu_addr + size;
1132 VAddr start = cpu_addr >> registry_page_bits;
1133 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1137 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1134 std::vector<TSurface> surfaces; 1138 VectorSurface surfaces;
1135 while (start <= end) { 1139 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1136 std::vector<TSurface>& list = registry[start]; 1140 const auto it = registry.find(start);
1137 for (auto& surface : list) { 1141 if (it == registry.end()) {
1138 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { 1142 continue;
1139 surface->MarkAsPicked(true); 1143 }
1140 surfaces.push_back(surface); 1144 for (auto& surface : it->second) {
1145 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
1146 continue;
1141 } 1147 }
1148 surface->MarkAsPicked(true);
1149 surfaces.push_back(surface);
1142 } 1150 }
1143 start++;
1144 } 1151 }
1145 for (auto& surface : surfaces) { 1152 for (auto& surface : surfaces) {
1146 surface->MarkAsPicked(false); 1153 surface->MarkAsPicked(false);
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index 3d759f77b..1f5e43043 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -106,6 +106,9 @@ public:
106 format.setVersion(4, 3); 106 format.setVersion(4, 3);
107 format.setProfile(QSurfaceFormat::CompatibilityProfile); 107 format.setProfile(QSurfaceFormat::CompatibilityProfile);
108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions); 108 format.setOption(QSurfaceFormat::FormatOption::DeprecatedFunctions);
109 if (Settings::values.renderer_debug) {
110 format.setOption(QSurfaceFormat::FormatOption::DebugContext);
111 }
109 // TODO: expose a setting for buffer value (ie default/single/double/triple) 112 // TODO: expose a setting for buffer value (ie default/single/double/triple)
110 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); 113 format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);
111 format.setSwapInterval(0); 114 format.setSwapInterval(0);
@@ -150,18 +153,19 @@ public:
150 } 153 }
151 154
152 void MakeCurrent() override { 155 void MakeCurrent() override {
153 if (is_current) { 156 // We can't track the current state of the underlying context in this wrapper class because
154 return; 157 // Qt may make the underlying context not current for one reason or another. In particular,
158 // the WebBrowser uses GL, so it seems to conflict if we aren't careful.
159 // Instead of always just making the context current (which does not have any caching to
160 // check if the underlying context is already current) we can check for the current context
161 // in the thread local data by calling `currentContext()` and checking if its ours.
162 if (QOpenGLContext::currentContext() != context.get()) {
163 context->makeCurrent(surface);
155 } 164 }
156 is_current = context->makeCurrent(surface);
157 } 165 }
158 166
159 void DoneCurrent() override { 167 void DoneCurrent() override {
160 if (!is_current) {
161 return;
162 }
163 context->doneCurrent(); 168 context->doneCurrent();
164 is_current = false;
165 } 169 }
166 170
167 QOpenGLContext* GetShareContext() { 171 QOpenGLContext* GetShareContext() {
@@ -178,7 +182,6 @@ private:
178 std::unique_ptr<QOpenGLContext> context; 182 std::unique_ptr<QOpenGLContext> context;
179 std::unique_ptr<QOffscreenSurface> offscreen_surface{}; 183 std::unique_ptr<QOffscreenSurface> offscreen_surface{};
180 QSurface* surface; 184 QSurface* surface;
181 bool is_current = false;
182}; 185};
183 186
184class DummyContext : public Core::Frontend::GraphicsContext {}; 187class DummyContext : public Core::Frontend::GraphicsContext {};
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 75c6cf20b..b08b87426 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -643,6 +643,8 @@ void Config::ReadRendererValues() {
643 Settings::values.use_asynchronous_gpu_emulation = 643 Settings::values.use_asynchronous_gpu_emulation =
644 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); 644 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
645 Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); 645 Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
646 Settings::values.use_assembly_shaders =
647 ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool();
646 Settings::values.use_fast_gpu_time = 648 Settings::values.use_fast_gpu_time =
647 ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool(); 649 ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
648 Settings::values.force_30fps_mode = 650 Settings::values.force_30fps_mode =
@@ -687,6 +689,8 @@ void Config::ReadSystemValues() {
687 689
688 Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt(); 690 Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt();
689 691
692 Settings::values.time_zone_index = ReadSetting(QStringLiteral("time_zone_index"), 0).toInt();
693
690 const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); 694 const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool();
691 if (rng_seed_enabled) { 695 if (rng_seed_enabled) {
692 Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); 696 Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong();
@@ -1088,6 +1092,8 @@ void Config::SaveRendererValues() {
1088 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), 1092 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
1089 Settings::values.use_asynchronous_gpu_emulation, false); 1093 Settings::values.use_asynchronous_gpu_emulation, false);
1090 WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); 1094 WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
1095 WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders,
1096 false);
1091 WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); 1097 WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
1092 WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false); 1098 WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);
1093 1099
@@ -1126,6 +1132,7 @@ void Config::SaveSystemValues() {
1126 WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); 1132 WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0);
1127 WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); 1133 WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1);
1128 WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1); 1134 WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1);
1135 WriteSetting(QStringLiteral("time_zone_index"), Settings::values.time_zone_index, 0);
1129 1136
1130 WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); 1137 WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false);
1131 WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); 1138 WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 5bb2ae555..37aadf7f8 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -12,6 +12,9 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
12 12
13 ui->setupUi(this); 13 ui->setupUi(this);
14 14
15 // TODO: Remove this after assembly shaders are fully integrated
16 ui->use_assembly_shaders->setVisible(false);
17
15 SetConfiguration(); 18 SetConfiguration();
16} 19}
17 20
@@ -22,6 +25,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
22 ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); 25 ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
23 ui->use_vsync->setEnabled(runtime_lock); 26 ui->use_vsync->setEnabled(runtime_lock);
24 ui->use_vsync->setChecked(Settings::values.use_vsync); 27 ui->use_vsync->setChecked(Settings::values.use_vsync);
28 ui->use_assembly_shaders->setEnabled(runtime_lock);
29 ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders);
25 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time); 30 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
26 ui->force_30fps_mode->setEnabled(runtime_lock); 31 ui->force_30fps_mode->setEnabled(runtime_lock);
27 ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode); 32 ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
@@ -33,6 +38,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
33 auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); 38 auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
34 Settings::values.gpu_accuracy = gpu_accuracy; 39 Settings::values.gpu_accuracy = gpu_accuracy;
35 Settings::values.use_vsync = ui->use_vsync->isChecked(); 40 Settings::values.use_vsync = ui->use_vsync->isChecked();
41 Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked();
36 Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked(); 42 Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
37 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); 43 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
38 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); 44 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 770b80c50..0021607ac 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -63,6 +63,16 @@
63 </widget> 63 </widget>
64 </item> 64 </item>
65 <item> 65 <item>
66 <widget class="QCheckBox" name="use_assembly_shaders">
67 <property name="toolTip">
68 <string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
69 </property>
70 <property name="text">
71 <string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
72 </property>
73 </widget>
74 </item>
75 <item>
66 <widget class="QCheckBox" name="force_30fps_mode"> 76 <widget class="QCheckBox" name="force_30fps_mode">
67 <property name="text"> 77 <property name="text">
68 <string>Force 30 FPS mode</string> 78 <string>Force 30 FPS mode</string>
diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp
index f49cd4c8f..10315e7a6 100644
--- a/src/yuzu/configuration/configure_system.cpp
+++ b/src/yuzu/configuration/configure_system.cpp
@@ -57,6 +57,7 @@ void ConfigureSystem::SetConfiguration() {
57 57
58 ui->combo_language->setCurrentIndex(Settings::values.language_index); 58 ui->combo_language->setCurrentIndex(Settings::values.language_index);
59 ui->combo_region->setCurrentIndex(Settings::values.region_index); 59 ui->combo_region->setCurrentIndex(Settings::values.region_index);
60 ui->combo_time_zone->setCurrentIndex(Settings::values.time_zone_index);
60 ui->combo_sound->setCurrentIndex(Settings::values.sound_index); 61 ui->combo_sound->setCurrentIndex(Settings::values.sound_index);
61 62
62 ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); 63 ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value());
@@ -84,6 +85,7 @@ void ConfigureSystem::ApplyConfiguration() {
84 85
85 Settings::values.language_index = ui->combo_language->currentIndex(); 86 Settings::values.language_index = ui->combo_language->currentIndex();
86 Settings::values.region_index = ui->combo_region->currentIndex(); 87 Settings::values.region_index = ui->combo_region->currentIndex();
88 Settings::values.time_zone_index = ui->combo_time_zone->currentIndex();
87 Settings::values.sound_index = ui->combo_sound->currentIndex(); 89 Settings::values.sound_index = ui->combo_sound->currentIndex();
88 90
89 if (ui->rng_seed_checkbox->isChecked()) { 91 if (ui->rng_seed_checkbox->isChecked()) {
diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h
index d8fa2d2cc..26d42d5c5 100644
--- a/src/yuzu/configuration/configure_system.h
+++ b/src/yuzu/configuration/configure_system.h
@@ -37,5 +37,6 @@ private:
37 37
38 int language_index = 0; 38 int language_index = 0;
39 int region_index = 0; 39 int region_index = 0;
40 int time_zone_index = 0;
40 int sound_index = 0; 41 int sound_index = 0;
41}; 42};
diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui
index 4e2c7e76e..9c8cca6dc 100644
--- a/src/yuzu/configuration/configure_system.ui
+++ b/src/yuzu/configuration/configure_system.ui
@@ -22,14 +22,14 @@
22 <string>System Settings</string> 22 <string>System Settings</string>
23 </property> 23 </property>
24 <layout class="QGridLayout" name="gridLayout"> 24 <layout class="QGridLayout" name="gridLayout">
25 <item row="2" column="0"> 25 <item row="3" column="0">
26 <widget class="QLabel" name="label_sound"> 26 <widget class="QLabel" name="label_sound">
27 <property name="text"> 27 <property name="text">
28 <string>Sound output mode</string> 28 <string>Sound output mode</string>
29 </property> 29 </property>
30 </widget> 30 </widget>
31 </item> 31 </item>
32 <item row="3" column="0"> 32 <item row="4" column="0">
33 <widget class="QLabel" name="label_console_id"> 33 <widget class="QLabel" name="label_console_id">
34 <property name="text"> 34 <property name="text">
35 <string>Console ID:</string> 35 <string>Console ID:</string>
@@ -174,14 +174,255 @@
174 </item> 174 </item>
175 </widget> 175 </widget>
176 </item> 176 </item>
177 <item row="5" column="0"> 177 <item row="2" column="0">
178 <widget class="QLabel" name="label_timezone">
179 <property name="text">
180 <string>Time Zone:</string>
181 </property>
182 </widget>
183 </item>
184 <item row="2" column="1">
185 <widget class="QComboBox" name="combo_time_zone">
186 <item>
187 <property name="text">
188 <string>Auto</string>
189 </property>
190 </item>
191 <item>
192 <property name="text">
193 <string>Default</string>
194 </property>
195 </item>
196 <item>
197 <property name="text">
198 <string>CET</string>
199 </property>
200 </item>
201 <item>
202 <property name="text">
203 <string>CST6CDT</string>
204 </property>
205 </item>
206 <item>
207 <property name="text">
208 <string>Cuba</string>
209 </property>
210 </item>
211 <item>
212 <property name="text">
213 <string>EET</string>
214 </property>
215 </item>
216 <item>
217 <property name="text">
218 <string>Egypt</string>
219 </property>
220 </item>
221 <item>
222 <property name="text">
223 <string>Eire</string>
224 </property>
225 </item>
226 <item>
227 <property name="text">
228 <string>EST</string>
229 </property>
230 </item>
231 <item>
232 <property name="text">
233 <string>EST5EDT</string>
234 </property>
235 </item>
236 <item>
237 <property name="text">
238 <string>GB</string>
239 </property>
240 </item>
241 <item>
242 <property name="text">
243 <string>GB-Eire</string>
244 </property>
245 </item>
246 <item>
247 <property name="text">
248 <string>GMT</string>
249 </property>
250 </item>
251 <item>
252 <property name="text">
253 <string>GMT+0</string>
254 </property>
255 </item>
256 <item>
257 <property name="text">
258 <string>GMT-0</string>
259 </property>
260 </item>
261 <item>
262 <property name="text">
263 <string>GMT0</string>
264 </property>
265 </item>
266 <item>
267 <property name="text">
268 <string>Greenwich</string>
269 </property>
270 </item>
271 <item>
272 <property name="text">
273 <string>Hongkong</string>
274 </property>
275 </item>
276 <item>
277 <property name="text">
278 <string>HST</string>
279 </property>
280 </item>
281 <item>
282 <property name="text">
283 <string>Iceland</string>
284 </property>
285 </item>
286 <item>
287 <property name="text">
288 <string>Iran</string>
289 </property>
290 </item>
291 <item>
292 <property name="text">
293 <string>Israel</string>
294 </property>
295 </item>
296 <item>
297 <property name="text">
298 <string>Jamaica</string>
299 </property>
300 </item>
301 <item>
302 <property name="text">
303 <string>Japan</string>
304 </property>
305 </item>
306 <item>
307 <property name="text">
308 <string>Kwajalein</string>
309 </property>
310 </item>
311 <item>
312 <property name="text">
313 <string>Libya</string>
314 </property>
315 </item>
316 <item>
317 <property name="text">
318 <string>MET</string>
319 </property>
320 </item>
321 <item>
322 <property name="text">
323 <string>MST</string>
324 </property>
325 </item>
326 <item>
327 <property name="text">
328 <string>MST7MDT</string>
329 </property>
330 </item>
331 <item>
332 <property name="text">
333 <string>Navajo</string>
334 </property>
335 </item>
336 <item>
337 <property name="text">
338 <string>NZ</string>
339 </property>
340 </item>
341 <item>
342 <property name="text">
343 <string>NZ-CHAT</string>
344 </property>
345 </item>
346 <item>
347 <property name="text">
348 <string>Poland</string>
349 </property>
350 </item>
351 <item>
352 <property name="text">
353 <string>Portugal</string>
354 </property>
355 </item>
356 <item>
357 <property name="text">
358 <string>PRC</string>
359 </property>
360 </item>
361 <item>
362 <property name="text">
363 <string>PST8PDT</string>
364 </property>
365 </item>
366 <item>
367 <property name="text">
368 <string>ROC</string>
369 </property>
370 </item>
371 <item>
372 <property name="text">
373 <string>ROK</string>
374 </property>
375 </item>
376 <item>
377 <property name="text">
378 <string>Singapore</string>
379 </property>
380 </item>
381 <item>
382 <property name="text">
383 <string>Turkey</string>
384 </property>
385 </item>
386 <item>
387 <property name="text">
388 <string>UCT</string>
389 </property>
390 </item>
391 <item>
392 <property name="text">
393 <string>Universal</string>
394 </property>
395 </item>
396 <item>
397 <property name="text">
398 <string>UTC</string>
399 </property>
400 </item>
401 <item>
402 <property name="text">
403 <string>W-SU</string>
404 </property>
405 </item>
406 <item>
407 <property name="text">
408 <string>WET</string>
409 </property>
410 </item>
411 <item>
412 <property name="text">
413 <string>Zulu</string>
414 </property>
415 </item>
416 </widget>
417 </item>
418 <item row="6" column="0">
178 <widget class="QCheckBox" name="rng_seed_checkbox"> 419 <widget class="QCheckBox" name="rng_seed_checkbox">
179 <property name="text"> 420 <property name="text">
180 <string>RNG Seed</string> 421 <string>RNG Seed</string>
181 </property> 422 </property>
182 </widget> 423 </widget>
183 </item> 424 </item>
184 <item row="2" column="1"> 425 <item row="3" column="1">
185 <widget class="QComboBox" name="combo_sound"> 426 <widget class="QComboBox" name="combo_sound">
186 <item> 427 <item>
187 <property name="text"> 428 <property name="text">
@@ -207,7 +448,7 @@
207 </property> 448 </property>
208 </widget> 449 </widget>
209 </item> 450 </item>
210 <item row="3" column="1"> 451 <item row="4" column="1">
211 <widget class="QPushButton" name="button_regenerate_console_id"> 452 <widget class="QPushButton" name="button_regenerate_console_id">
212 <property name="sizePolicy"> 453 <property name="sizePolicy">
213 <sizepolicy hsizetype="Fixed" vsizetype="Fixed"> 454 <sizepolicy hsizetype="Fixed" vsizetype="Fixed">
@@ -223,14 +464,14 @@
223 </property> 464 </property>
224 </widget> 465 </widget>
225 </item> 466 </item>
226 <item row="4" column="0"> 467 <item row="5" column="0">
227 <widget class="QCheckBox" name="custom_rtc_checkbox"> 468 <widget class="QCheckBox" name="custom_rtc_checkbox">
228 <property name="text"> 469 <property name="text">
229 <string>Custom RTC</string> 470 <string>Custom RTC</string>
230 </property> 471 </property>
231 </widget> 472 </widget>
232 </item> 473 </item>
233 <item row="4" column="1"> 474 <item row="5" column="1">
234 <widget class="QDateTimeEdit" name="custom_rtc_edit"> 475 <widget class="QDateTimeEdit" name="custom_rtc_edit">
235 <property name="minimumDate"> 476 <property name="minimumDate">
236 <date> 477 <date>
@@ -244,7 +485,7 @@
244 </property> 485 </property>
245 </widget> 486 </widget>
246 </item> 487 </item>
247 <item row="5" column="1"> 488 <item row="6" column="1">
248 <widget class="QLineEdit" name="rng_seed_edit"> 489 <widget class="QLineEdit" name="rng_seed_edit">
249 <property name="sizePolicy"> 490 <property name="sizePolicy">
250 <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> 491 <sizepolicy hsizetype="Minimum" vsizetype="Fixed">
diff --git a/src/yuzu/discord_impl.cpp b/src/yuzu/discord_impl.cpp
index ea0079353..a93733b26 100644
--- a/src/yuzu/discord_impl.cpp
+++ b/src/yuzu/discord_impl.cpp
@@ -18,7 +18,7 @@ DiscordImpl::DiscordImpl() {
18 18
19 // The number is the client ID for yuzu, it's used for images and the 19 // The number is the client ID for yuzu, it's used for images and the
20 // application name 20 // application name
21 Discord_Initialize("471872241299226636", &handlers, 1, nullptr); 21 Discord_Initialize("712465656758665259", &handlers, 1, nullptr);
22} 22}
23 23
24DiscordImpl::~DiscordImpl() { 24DiscordImpl::~DiscordImpl() {
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index dccbabcbf..bfb600df0 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -488,11 +488,11 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, std::string pat
488 auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id); 488 auto it = FindMatchingCompatibilityEntry(compatibility_list, program_id);
489 navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0); 489 navigate_to_gamedb_entry->setVisible(it != compatibility_list.end() && program_id != 0);
490 490
491 connect(open_save_location, &QAction::triggered, [this, program_id]() { 491 connect(open_save_location, &QAction::triggered, [this, program_id, path]() {
492 emit OpenFolderRequested(program_id, GameListOpenTarget::SaveData); 492 emit OpenFolderRequested(GameListOpenTarget::SaveData, path);
493 }); 493 });
494 connect(open_lfs_location, &QAction::triggered, [this, program_id]() { 494 connect(open_lfs_location, &QAction::triggered, [this, program_id, path]() {
495 emit OpenFolderRequested(program_id, GameListOpenTarget::ModData); 495 emit OpenFolderRequested(GameListOpenTarget::ModData, path);
496 }); 496 });
497 connect(open_transferable_shader_cache, &QAction::triggered, 497 connect(open_transferable_shader_cache, &QAction::triggered,
498 [this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); }); 498 [this, program_id]() { emit OpenTransferableShaderCacheRequested(program_id); });
diff --git a/src/yuzu/game_list.h b/src/yuzu/game_list.h
index 878d94413..a38cb2fc3 100644
--- a/src/yuzu/game_list.h
+++ b/src/yuzu/game_list.h
@@ -73,7 +73,7 @@ public:
73signals: 73signals:
74 void GameChosen(QString game_path); 74 void GameChosen(QString game_path);
75 void ShouldCancelWorker(); 75 void ShouldCancelWorker();
76 void OpenFolderRequested(u64 program_id, GameListOpenTarget target); 76 void OpenFolderRequested(GameListOpenTarget target, const std::string& game_path);
77 void OpenTransferableShaderCacheRequested(u64 program_id); 77 void OpenTransferableShaderCacheRequested(u64 program_id);
78 void DumpRomFSRequested(u64 program_id, const std::string& game_path); 78 void DumpRomFSRequested(u64 program_id, const std::string& game_path);
79 void CopyTIDRequested(u64 program_id); 79 void CopyTIDRequested(u64 program_id);
diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp
index 2a6483370..ae842306c 100644
--- a/src/yuzu/loading_screen.cpp
+++ b/src/yuzu/loading_screen.cpp
@@ -19,6 +19,7 @@
19#include <QTime> 19#include <QTime>
20#include <QtConcurrent/QtConcurrentRun> 20#include <QtConcurrent/QtConcurrentRun>
21#include "common/logging/log.h" 21#include "common/logging/log.h"
22#include "core/frontend/framebuffer_layout.h"
22#include "core/loader/loader.h" 23#include "core/loader/loader.h"
23#include "ui_loading_screen.h" 24#include "ui_loading_screen.h"
24#include "video_core/rasterizer_interface.h" 25#include "video_core/rasterizer_interface.h"
@@ -61,7 +62,7 @@ LoadingScreen::LoadingScreen(QWidget* parent)
61 : QWidget(parent), ui(std::make_unique<Ui::LoadingScreen>()), 62 : QWidget(parent), ui(std::make_unique<Ui::LoadingScreen>()),
62 previous_stage(VideoCore::LoadCallbackStage::Complete) { 63 previous_stage(VideoCore::LoadCallbackStage::Complete) {
63 ui->setupUi(this); 64 ui->setupUi(this);
64 setMinimumSize(1280, 720); 65 setMinimumSize(Layout::MinimumSize::Width, Layout::MinimumSize::Height);
65 66
66 // Create a fade out effect to hide this loading screen widget. 67 // Create a fade out effect to hide this loading screen widget.
67 // When fading opacity, it will fade to the parent widgets background color, which is why we 68 // When fading opacity, it will fade to the parent widgets background color, which is why we
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 86e8a1d49..270cccc77 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -65,6 +65,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
65#include "common/logging/backend.h" 65#include "common/logging/backend.h"
66#include "common/logging/filter.h" 66#include "common/logging/filter.h"
67#include "common/logging/log.h" 67#include "common/logging/log.h"
68#include "common/memory_detect.h"
68#include "common/microprofile.h" 69#include "common/microprofile.h"
69#include "common/scm_rev.h" 70#include "common/scm_rev.h"
70#include "common/scope_exit.h" 71#include "common/scope_exit.h"
@@ -219,6 +220,10 @@ GMainWindow::GMainWindow()
219 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string); 220 LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
220#endif 221#endif
221 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString()); 222 LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
223 LOG_INFO(Frontend, "Host RAM: {:.2f} GB",
224 Common::GetMemInfo().TotalPhysicalMemory / 1024.0f / 1024 / 1024);
225 LOG_INFO(Frontend, "Host Swap: {:.2f} GB",
226 Common::GetMemInfo().TotalSwapMemory / 1024.0f / 1024 / 1024);
222 UpdateWindowTitle(); 227 UpdateWindowTitle();
223 228
224 show(); 229 show();
@@ -724,13 +729,13 @@ void GMainWindow::InitializeHotkeys() {
724} 729}
725 730
726void GMainWindow::SetDefaultUIGeometry() { 731void GMainWindow::SetDefaultUIGeometry() {
727 // geometry: 55% of the window contents are in the upper screen half, 45% in the lower half 732 // geometry: 53% of the window contents are in the upper screen half, 47% in the lower half
728 const QRect screenRect = QApplication::desktop()->screenGeometry(this); 733 const QRect screenRect = QApplication::desktop()->screenGeometry(this);
729 734
730 const int w = screenRect.width() * 2 / 3; 735 const int w = screenRect.width() * 2 / 3;
731 const int h = screenRect.height() / 2; 736 const int h = screenRect.height() * 2 / 3;
732 const int x = (screenRect.x() + screenRect.width()) / 2 - w / 2; 737 const int x = (screenRect.x() + screenRect.width()) / 2 - w / 2;
733 const int y = (screenRect.y() + screenRect.height()) / 2 - h * 55 / 100; 738 const int y = (screenRect.y() + screenRect.height()) / 2 - h * 53 / 100;
734 739
735 setGeometry(x, y, w, h); 740 setGeometry(x, y, w, h);
736} 741}
@@ -831,6 +836,7 @@ void GMainWindow::ConnectMenuEvents() {
831 &GMainWindow::OnDisplayTitleBars); 836 &GMainWindow::OnDisplayTitleBars);
832 connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar); 837 connect(ui.action_Show_Filter_Bar, &QAction::triggered, this, &GMainWindow::OnToggleFilterBar);
833 connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible); 838 connect(ui.action_Show_Status_Bar, &QAction::triggered, statusBar(), &QStatusBar::setVisible);
839 connect(ui.action_Reset_Window_Size, &QAction::triggered, this, &GMainWindow::ResetWindowSize);
834 840
835 // Fullscreen 841 // Fullscreen
836 ui.action_Fullscreen->setShortcut( 842 ui.action_Fullscreen->setShortcut(
@@ -1154,39 +1160,61 @@ void GMainWindow::OnGameListLoadFile(QString game_path) {
1154 BootGame(game_path); 1160 BootGame(game_path);
1155} 1161}
1156 1162
1157void GMainWindow::OnGameListOpenFolder(u64 program_id, GameListOpenTarget target) { 1163void GMainWindow::OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path) {
1158 std::string path; 1164 std::string path;
1159 QString open_target; 1165 QString open_target;
1166
1167 const auto v_file = Core::GetGameFileFromPath(vfs, game_path);
1168 const auto loader = Loader::GetLoader(v_file);
1169 FileSys::NACP control{};
1170 u64 program_id{};
1171
1172 loader->ReadControlData(control);
1173 loader->ReadProgramId(program_id);
1174
1175 const bool has_user_save{control.GetDefaultNormalSaveSize() > 0};
1176 const bool has_device_save{control.GetDeviceSaveDataSize() > 0};
1177
1178 ASSERT_MSG(has_user_save != has_device_save, "Game uses both user and device savedata?");
1179
1160 switch (target) { 1180 switch (target) {
1161 case GameListOpenTarget::SaveData: { 1181 case GameListOpenTarget::SaveData: {
1162 open_target = tr("Save Data"); 1182 open_target = tr("Save Data");
1163 const std::string nand_dir = FileUtil::GetUserPath(FileUtil::UserPath::NANDDir); 1183 const std::string nand_dir = FileUtil::GetUserPath(FileUtil::UserPath::NANDDir);
1164 ASSERT(program_id != 0); 1184 ASSERT(program_id != 0);
1165 1185
1166 const auto select_profile = [this] { 1186 if (has_user_save) {
1167 QtProfileSelectionDialog dialog(this); 1187 // User save data
1168 dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint | 1188 const auto select_profile = [this] {
1169 Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint); 1189 QtProfileSelectionDialog dialog(this);
1170 dialog.setWindowModality(Qt::WindowModal); 1190 dialog.setWindowFlags(Qt::Dialog | Qt::CustomizeWindowHint | Qt::WindowTitleHint |
1191 Qt::WindowSystemMenuHint | Qt::WindowCloseButtonHint);
1192 dialog.setWindowModality(Qt::WindowModal);
1171 1193
1172 if (dialog.exec() == QDialog::Rejected) { 1194 if (dialog.exec() == QDialog::Rejected) {
1173 return -1; 1195 return -1;
1174 } 1196 }
1175 1197
1176 return dialog.GetIndex(); 1198 return dialog.GetIndex();
1177 }; 1199 };
1178 1200
1179 const auto index = select_profile(); 1201 const auto index = select_profile();
1180 if (index == -1) { 1202 if (index == -1) {
1181 return; 1203 return;
1182 } 1204 }
1183 1205
1184 Service::Account::ProfileManager manager; 1206 Service::Account::ProfileManager manager;
1185 const auto user_id = manager.GetUser(static_cast<std::size_t>(index)); 1207 const auto user_id = manager.GetUser(static_cast<std::size_t>(index));
1186 ASSERT(user_id); 1208 ASSERT(user_id);
1187 path = nand_dir + FileSys::SaveDataFactory::GetFullPath(FileSys::SaveDataSpaceId::NandUser, 1209 path = nand_dir + FileSys::SaveDataFactory::GetFullPath(
1188 FileSys::SaveDataType::SaveData, 1210 FileSys::SaveDataSpaceId::NandUser,
1189 program_id, user_id->uuid, 0); 1211 FileSys::SaveDataType::SaveData, program_id, user_id->uuid, 0);
1212 } else {
1213 // Device save data
1214 path = nand_dir + FileSys::SaveDataFactory::GetFullPath(
1215 FileSys::SaveDataSpaceId::NandUser,
1216 FileSys::SaveDataType::SaveData, program_id, {}, 0);
1217 }
1190 1218
1191 if (!FileUtil::Exists(path)) { 1219 if (!FileUtil::Exists(path)) {
1192 FileUtil::CreateFullPath(path); 1220 FileUtil::CreateFullPath(path);
@@ -1829,6 +1857,20 @@ void GMainWindow::ToggleWindowMode() {
1829 } 1857 }
1830} 1858}
1831 1859
1860void GMainWindow::ResetWindowSize() {
1861 const auto aspect_ratio = Layout::EmulationAspectRatio(
1862 static_cast<Layout::AspectRatio>(Settings::values.aspect_ratio),
1863 static_cast<float>(Layout::ScreenUndocked::Height) / Layout::ScreenUndocked::Width);
1864 if (!ui.action_Single_Window_Mode->isChecked()) {
1865 render_window->resize(Layout::ScreenUndocked::Height / aspect_ratio,
1866 Layout::ScreenUndocked::Height);
1867 } else {
1868 resize(Layout::ScreenUndocked::Height / aspect_ratio,
1869 Layout::ScreenUndocked::Height + menuBar()->height() +
1870 (ui.action_Show_Status_Bar->isChecked() ? statusBar()->height() : 0));
1871 }
1872}
1873
1832void GMainWindow::OnConfigure() { 1874void GMainWindow::OnConfigure() {
1833 const auto old_theme = UISettings::values.theme; 1875 const auto old_theme = UISettings::values.theme;
1834 const bool old_discord_presence = UISettings::values.enable_discord_presence; 1876 const bool old_discord_presence = UISettings::values.enable_discord_presence;
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index 60b17c54a..4f4c8ddbe 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -183,7 +183,7 @@ private slots:
183 void OnMenuReportCompatibility(); 183 void OnMenuReportCompatibility();
184 /// Called whenever a user selects a game in the game list widget. 184 /// Called whenever a user selects a game in the game list widget.
185 void OnGameListLoadFile(QString game_path); 185 void OnGameListLoadFile(QString game_path);
186 void OnGameListOpenFolder(u64 program_id, GameListOpenTarget target); 186 void OnGameListOpenFolder(GameListOpenTarget target, const std::string& game_path);
187 void OnTransferableShaderCacheOpenFile(u64 program_id); 187 void OnTransferableShaderCacheOpenFile(u64 program_id);
188 void OnGameListDumpRomFS(u64 program_id, const std::string& game_path); 188 void OnGameListDumpRomFS(u64 program_id, const std::string& game_path);
189 void OnGameListCopyTID(u64 program_id); 189 void OnGameListCopyTID(u64 program_id);
@@ -208,6 +208,7 @@ private slots:
208 void ShowFullscreen(); 208 void ShowFullscreen();
209 void HideFullscreen(); 209 void HideFullscreen();
210 void ToggleWindowMode(); 210 void ToggleWindowMode();
211 void ResetWindowSize();
211 void OnCaptureScreenshot(); 212 void OnCaptureScreenshot();
212 void OnCoreError(Core::System::ResultStatus, std::string); 213 void OnCoreError(Core::System::ResultStatus, std::string);
213 void OnReinitializeKeys(ReinitializeKeyBehavior behavior); 214 void OnReinitializeKeys(ReinitializeKeyBehavior behavior);
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index ae414241e..97c90f50b 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -6,8 +6,8 @@
6 <rect> 6 <rect>
7 <x>0</x> 7 <x>0</x>
8 <y>0</y> 8 <y>0</y>
9 <width>1081</width> 9 <width>1280</width>
10 <height>730</height> 10 <height>720</height>
11 </rect> 11 </rect>
12 </property> 12 </property>
13 <property name="windowTitle"> 13 <property name="windowTitle">
@@ -44,7 +44,7 @@
44 <rect> 44 <rect>
45 <x>0</x> 45 <x>0</x>
46 <y>0</y> 46 <y>0</y>
47 <width>1081</width> 47 <width>1280</width>
48 <height>21</height> 48 <height>21</height>
49 </rect> 49 </rect>
50 </property> 50 </property>
@@ -96,6 +96,7 @@
96 <addaction name="action_Display_Dock_Widget_Headers"/> 96 <addaction name="action_Display_Dock_Widget_Headers"/>
97 <addaction name="action_Show_Filter_Bar"/> 97 <addaction name="action_Show_Filter_Bar"/>
98 <addaction name="action_Show_Status_Bar"/> 98 <addaction name="action_Show_Status_Bar"/>
99 <addaction name="action_Reset_Window_Size"/>
99 <addaction name="separator"/> 100 <addaction name="separator"/>
100 <addaction name="menu_View_Debugging"/> 101 <addaction name="menu_View_Debugging"/>
101 </widget> 102 </widget>
@@ -215,6 +216,11 @@
215 <string>Show Status Bar</string> 216 <string>Show Status Bar</string>
216 </property> 217 </property>
217 </action> 218 </action>
219 <action name="action_Reset_Window_Size">
220 <property name="text">
221 <string>Reset Window Size</string>
222 </property>
223 </action>
218 <action name="action_Fullscreen"> 224 <action name="action_Fullscreen">
219 <property name="checkable"> 225 <property name="checkable">
220 <bool>true</bool> 226 <bool>true</bool>
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 8476a5a16..c20d48c42 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -367,6 +367,9 @@ void Config::ReadValues() {
367 Settings::values.custom_rtc = std::nullopt; 367 Settings::values.custom_rtc = std::nullopt;
368 } 368 }
369 369
370 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
371 Settings::values.time_zone_index = sdl2_config->GetInteger("System", "time_zone_index", 0);
372
370 // Core 373 // Core
371 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false); 374 Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
372 375
@@ -394,6 +397,8 @@ void Config::ReadValues() {
394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); 397 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
395 Settings::values.use_vsync = 398 Settings::values.use_vsync =
396 static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1)); 399 static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
400 Settings::values.use_assembly_shaders =
401 sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false);
397 Settings::values.use_fast_gpu_time = 402 Settings::values.use_fast_gpu_time =
398 sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true); 403 sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);
399 404
@@ -409,8 +414,6 @@ void Config::ReadValues() {
409 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); 414 Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
410 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1)); 415 Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1));
411 416
412 Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1);
413
414 // Miscellaneous 417 // Miscellaneous
415 Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace"); 418 Settings::values.log_filter = sdl2_config->Get("Miscellaneous", "log_filter", "*:Trace");
416 Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false); 419 Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 60b1a62fa..abc6e6e65 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -134,6 +134,10 @@ max_anisotropy =
134# 0 (default): Off, 1: On 134# 0 (default): Off, 1: On
135use_vsync = 135use_vsync =
136 136
137# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
138# 0 (default): Off, 1: On
139use_assembly_shaders =
140
137# Turns on the frame limiter, which will limit frames output to the target game speed 141# Turns on the frame limiter, which will limit frames output to the target game speed
138# 0: Off, 1: On (default) 142# 0: Off, 1: On (default)
139use_frame_limit = 143use_frame_limit =
@@ -262,6 +266,10 @@ language_index =
262# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan 266# -1: Auto-select (default), 0: Japan, 1: USA, 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan
263region_value = 267region_value =
264 268
269# The system time zone that yuzu will use during emulation
270# 0: Auto-select (default), 1: Default (system archive value), Others: Index for specified time zone
271time_zone_index =
272
265[Miscellaneous] 273[Miscellaneous]
266# A filter which removes logs below a certain logging level. 274# A filter which removes logs below a certain logging level.
267# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical 275# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
diff --git a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
index 411e7e647..09cc0a3b5 100644
--- a/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
+++ b/src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@@ -98,6 +98,9 @@ EmuWindow_SDL2_GL::EmuWindow_SDL2_GL(Core::System& system, bool fullscreen)
98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8); 98 SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0); 99 SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 0);
100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1); 100 SDL_GL_SetAttribute(SDL_GL_SHARE_WITH_CURRENT_CONTEXT, 1);
101 if (Settings::values.renderer_debug) {
102 SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_DEBUG_FLAG);
103 }
101 SDL_GL_SetSwapInterval(0); 104 SDL_GL_SetSwapInterval(0);
102 105
103 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname, 106 std::string window_title = fmt::format("yuzu {} | {}-{}", Common::g_build_fullname,