summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeModules/GenerateSCMRev.cmake6
-rw-r--r--src/common/CMakeLists.txt8
-rw-r--r--src/common/algorithm.h (renamed from src/common/binary_find.h)6
-rw-r--r--src/common/hash.h11
-rw-r--r--src/core/core.cpp29
-rw-r--r--src/core/hle/service/am/am.cpp5
-rw-r--r--src/core/hle/service/apm/controller.cpp50
-rw-r--r--src/core/hle/service/apm/controller.h2
-rw-r--r--src/core/hle/service/bcat/backend/backend.cpp4
-rw-r--r--src/core/hle/service/bcat/backend/backend.h12
-rw-r--r--src/core/hle/service/bcat/backend/boxcat.cpp29
-rw-r--r--src/core/hle/service/bcat/backend/boxcat.h10
-rw-r--r--src/core/hle/service/bcat/module.cpp24
-rw-r--r--src/core/hle/service/hid/controllers/npad.cpp64
-rw-r--r--src/core/hle/service/hid/controllers/npad.h10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp33
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp4
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp4
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp4
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h6
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp12
-rw-r--r--src/core/memory/cheat_engine.cpp7
-rw-r--r--src/core/memory/dmnt_cheat_vm.cpp4
-rw-r--r--src/video_core/CMakeLists.txt7
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h119
-rw-r--r--src/video_core/engines/kepler_compute.cpp20
-rw-r--r--src/video_core/engines/kepler_compute.h14
-rw-r--r--src/video_core/engines/maxwell_3d.cpp69
-rw-r--r--src/video_core/engines/maxwell_3d.h16
-rw-r--r--src/video_core/engines/shader_bytecode.h26
-rw-r--r--src/video_core/gpu.cpp15
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h1
-rw-r--r--src/video_core/gpu_synch.h1
-rw-r--r--src/video_core/gpu_thread.cpp19
-rw-r--r--src/video_core/gpu_thread.h9
-rw-r--r--src/video_core/macro_interpreter.cpp71
-rw-r--r--src/video_core/macro_interpreter.h80
-rw-r--r--src/video_core/morton.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp533
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h77
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp142
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp449
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h44
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp86
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h36
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp54
-rw-r--r--src/video_core/shader/ast.cpp89
-rw-r--r--src/video_core/shader/ast.h6
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp110
-rw-r--r--src/video_core/shader/const_buffer_locker.h80
-rw-r--r--src/video_core/shader/control_flow.cpp383
-rw-r--r--src/video_core/shader/control_flow.h69
-rw-r--r--src/video_core/shader/decode.cpp41
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp6
-rw-r--r--src/video_core/shader/decode/image.cpp4
-rw-r--r--src/video_core/shader/decode/memory.cpp39
-rw-r--r--src/video_core/shader/decode/other.cpp2
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/texture.cpp78
-rw-r--r--src/video_core/shader/decode/video.cpp2
-rw-r--r--src/video_core/shader/decode/warp.cpp7
-rw-r--r--src/video_core/shader/expr.h21
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/video_core/shader/shader_ir.cpp127
-rw-r--r--src/video_core/shader/shader_ir.h31
-rw-r--r--src/video_core/surface.cpp34
-rw-r--r--src/video_core/surface.h222
-rw-r--r--src/video_core/texture_cache/surface_base.cpp1
-rw-r--r--src/video_core/texture_cache/surface_base.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h139
-rw-r--r--src/video_core/textures/astc.cpp15
79 files changed, 2346 insertions, 1477 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 09eabe2c7..21e03ae98 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -85,10 +85,12 @@ set(HASH_FILES
85 "${VIDEO_CORE}/shader/decode/xmad.cpp" 85 "${VIDEO_CORE}/shader/decode/xmad.cpp"
86 "${VIDEO_CORE}/shader/ast.cpp" 86 "${VIDEO_CORE}/shader/ast.cpp"
87 "${VIDEO_CORE}/shader/ast.h" 87 "${VIDEO_CORE}/shader/ast.h"
88 "${VIDEO_CORE}/shader/control_flow.cpp"
89 "${VIDEO_CORE}/shader/control_flow.h"
90 "${VIDEO_CORE}/shader/compiler_settings.cpp" 88 "${VIDEO_CORE}/shader/compiler_settings.cpp"
91 "${VIDEO_CORE}/shader/compiler_settings.h" 89 "${VIDEO_CORE}/shader/compiler_settings.h"
90 "${VIDEO_CORE}/shader/const_buffer_locker.cpp"
91 "${VIDEO_CORE}/shader/const_buffer_locker.h"
92 "${VIDEO_CORE}/shader/control_flow.cpp"
93 "${VIDEO_CORE}/shader/control_flow.h"
92 "${VIDEO_CORE}/shader/decode.cpp" 94 "${VIDEO_CORE}/shader/decode.cpp"
93 "${VIDEO_CORE}/shader/expr.cpp" 95 "${VIDEO_CORE}/shader/expr.cpp"
94 "${VIDEO_CORE}/shader/expr.h" 96 "${VIDEO_CORE}/shader/expr.h"
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 906c486fd..9c6f1c07c 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -74,10 +74,12 @@ add_custom_command(OUTPUT scm_rev.cpp
74 "${VIDEO_CORE}/shader/decode/xmad.cpp" 74 "${VIDEO_CORE}/shader/decode/xmad.cpp"
75 "${VIDEO_CORE}/shader/ast.cpp" 75 "${VIDEO_CORE}/shader/ast.cpp"
76 "${VIDEO_CORE}/shader/ast.h" 76 "${VIDEO_CORE}/shader/ast.h"
77 "${VIDEO_CORE}/shader/control_flow.cpp"
78 "${VIDEO_CORE}/shader/control_flow.h"
79 "${VIDEO_CORE}/shader/compiler_settings.cpp" 77 "${VIDEO_CORE}/shader/compiler_settings.cpp"
80 "${VIDEO_CORE}/shader/compiler_settings.h" 78 "${VIDEO_CORE}/shader/compiler_settings.h"
79 "${VIDEO_CORE}/shader/const_buffer_locker.cpp"
80 "${VIDEO_CORE}/shader/const_buffer_locker.h"
81 "${VIDEO_CORE}/shader/control_flow.cpp"
82 "${VIDEO_CORE}/shader/control_flow.h"
81 "${VIDEO_CORE}/shader/decode.cpp" 83 "${VIDEO_CORE}/shader/decode.cpp"
82 "${VIDEO_CORE}/shader/expr.cpp" 84 "${VIDEO_CORE}/shader/expr.cpp"
83 "${VIDEO_CORE}/shader/expr.h" 85 "${VIDEO_CORE}/shader/expr.h"
@@ -95,11 +97,11 @@ add_custom_command(OUTPUT scm_rev.cpp
95) 97)
96 98
97add_library(common STATIC 99add_library(common STATIC
100 algorithm.h
98 alignment.h 101 alignment.h
99 assert.h 102 assert.h
100 detached_tasks.cpp 103 detached_tasks.cpp
101 detached_tasks.h 104 detached_tasks.h
102 binary_find.h
103 bit_field.h 105 bit_field.h
104 bit_util.h 106 bit_util.h
105 cityhash.cpp 107 cityhash.cpp
diff --git a/src/common/binary_find.h b/src/common/algorithm.h
index 5cc523bf9..e21b1373c 100644
--- a/src/common/binary_find.h
+++ b/src/common/algorithm.h
@@ -5,6 +5,12 @@
5#pragma once 5#pragma once
6 6
7#include <algorithm> 7#include <algorithm>
8#include <functional>
9
10// Algorithms that operate on iterators, much like the <algorithm> header.
11//
12// Note: If the algorithm is not general-purpose and/or doesn't operate on iterators,
13// it should probably not be placed within this header.
8 14
9namespace Common { 15namespace Common {
10 16
diff --git a/src/common/hash.h b/src/common/hash.h
index 40194d1ee..ebd4125e2 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -6,6 +6,8 @@
6 6
7#include <cstddef> 7#include <cstddef>
8#include <cstring> 8#include <cstring>
9#include <utility>
10#include <boost/functional/hash.hpp>
9#include "common/cityhash.h" 11#include "common/cityhash.h"
10#include "common/common_types.h" 12#include "common/common_types.h"
11 13
@@ -68,4 +70,13 @@ struct HashableStruct {
68 } 70 }
69}; 71};
70 72
73struct PairHash {
74 template <class T1, class T2>
75 std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept {
76 std::size_t seed = std::hash<T1>()(pair.first);
77 boost::hash_combine(seed, std::hash<T2>()(pair.second));
78 return seed;
79 }
80};
81
71} // namespace Common 82} // namespace Common
diff --git a/src/core/core.cpp b/src/core/core.cpp
index d79045eea..eba17218a 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -112,8 +112,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
112} 112}
113struct System::Impl { 113struct System::Impl {
114 explicit Impl(System& system) 114 explicit Impl(System& system)
115 : kernel{system}, fs_controller{system}, cpu_core_manager{system}, 115 : kernel{system}, fs_controller{system}, cpu_core_manager{system}, reporter{system},
116 applet_manager{system}, reporter{system} {} 116 applet_manager{system} {}
117 117
118 Cpu& CurrentCpuCore() { 118 Cpu& CurrentCpuCore() {
119 return cpu_core_manager.GetCurrentCore(); 119 return cpu_core_manager.GetCurrentCore();
@@ -240,22 +240,27 @@ struct System::Impl {
240 } 240 }
241 241
242 void Shutdown() { 242 void Shutdown() {
243 // Log last frame performance stats 243 // Log last frame performance stats if game was loded
244 const auto perf_results = GetAndResetPerfStats(); 244 if (perf_stats) {
245 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", 245 const auto perf_results = GetAndResetPerfStats();
246 perf_results.emulation_speed * 100.0); 246 telemetry_session->AddField(Telemetry::FieldType::Performance,
247 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", 247 "Shutdown_EmulationSpeed",
248 perf_results.game_fps); 248 perf_results.emulation_speed * 100.0);
249 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", 249 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate",
250 perf_results.frametime * 1000.0); 250 perf_results.game_fps);
251 telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", 251 telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
252 perf_stats->GetMeanFrametime()); 252 perf_results.frametime * 1000.0);
253 telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS",
254 perf_stats->GetMeanFrametime());
255 }
253 256
254 lm_manager.Flush(); 257 lm_manager.Flush();
255 258
256 is_powered_on = false; 259 is_powered_on = false;
257 exit_lock = false; 260 exit_lock = false;
258 261
262 gpu_core->WaitIdle();
263
259 // Shutdown emulation session 264 // Shutdown emulation session
260 renderer.reset(); 265 renderer.reset();
261 GDBStub::Shutdown(); 266 GDBStub::Shutdown();
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 941ebc93a..3a32d5b41 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -1140,8 +1140,9 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) {
1140 LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind)); 1140 LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind));
1141 1141
1142 if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { 1142 if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) {
1143 const auto backend = BCAT::CreateBackendFromSettings( 1143 const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) {
1144 [this](u64 tid) { return system.GetFileSystemController().GetBCATDirectory(tid); }); 1144 return system.GetFileSystemController().GetBCATDirectory(tid);
1145 });
1145 const auto build_id_full = system.GetCurrentProcessBuildID(); 1146 const auto build_id_full = system.GetCurrentProcessBuildID();
1146 u64 build_id{}; 1147 u64 build_id{};
1147 std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); 1148 std::memcpy(&build_id, build_id_full.data(), sizeof(u64));
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp
index 073d0f6fa..25a886238 100644
--- a/src/core/hle/service/apm/controller.cpp
+++ b/src/core/hle/service/apm/controller.cpp
@@ -2,6 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
7#include <utility>
8
5#include "common/logging/log.h" 9#include "common/logging/log.h"
6#include "core/core_timing.h" 10#include "core/core_timing.h"
7#include "core/hle/service/apm/controller.h" 11#include "core/hle/service/apm/controller.h"
@@ -9,8 +13,7 @@
9 13
10namespace Service::APM { 14namespace Service::APM {
11 15
12constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION = 16constexpr auto DEFAULT_PERFORMANCE_CONFIGURATION = PerformanceConfiguration::Config7;
13 PerformanceConfiguration::Config7;
14 17
15Controller::Controller(Core::Timing::CoreTiming& core_timing) 18Controller::Controller(Core::Timing::CoreTiming& core_timing)
16 : core_timing{core_timing}, configs{ 19 : core_timing{core_timing}, configs{
@@ -22,18 +25,35 @@ Controller::~Controller() = default;
22 25
23void Controller::SetPerformanceConfiguration(PerformanceMode mode, 26void Controller::SetPerformanceConfiguration(PerformanceMode mode,
24 PerformanceConfiguration config) { 27 PerformanceConfiguration config) {
25 static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{ 28 static constexpr std::array<std::pair<PerformanceConfiguration, u32>, 16> config_to_speed{{
26 {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020}, 29 {PerformanceConfiguration::Config1, 1020},
27 {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020}, 30 {PerformanceConfiguration::Config2, 1020},
28 {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224}, 31 {PerformanceConfiguration::Config3, 1224},
29 {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020}, 32 {PerformanceConfiguration::Config4, 1020},
30 {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020}, 33 {PerformanceConfiguration::Config5, 1020},
31 {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020}, 34 {PerformanceConfiguration::Config6, 1224},
32 {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785}, 35 {PerformanceConfiguration::Config7, 1020},
33 {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020}, 36 {PerformanceConfiguration::Config8, 1020},
34 }; 37 {PerformanceConfiguration::Config9, 1020},
35 38 {PerformanceConfiguration::Config10, 1020},
36 SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second); 39 {PerformanceConfiguration::Config11, 1020},
40 {PerformanceConfiguration::Config12, 1020},
41 {PerformanceConfiguration::Config13, 1785},
42 {PerformanceConfiguration::Config14, 1785},
43 {PerformanceConfiguration::Config15, 1020},
44 {PerformanceConfiguration::Config16, 1020},
45 }};
46
47 const auto iter = std::find_if(config_to_speed.cbegin(), config_to_speed.cend(),
48 [config](const auto& entry) { return entry.first == config; });
49
50 if (iter == config_to_speed.cend()) {
51 LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}",
52 static_cast<u32>(config));
53 return;
54 }
55
56 SetClockSpeed(iter->second);
37 configs.insert_or_assign(mode, config); 57 configs.insert_or_assign(mode, config);
38} 58}
39 59
@@ -48,7 +68,7 @@ void Controller::SetFromCpuBoostMode(CpuBoostMode mode) {
48 BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode))); 68 BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode)));
49} 69}
50 70
51PerformanceMode Controller::GetCurrentPerformanceMode() { 71PerformanceMode Controller::GetCurrentPerformanceMode() const {
52 return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld; 72 return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld;
53} 73}
54 74
diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h
index 454caa6eb..af0c4cd34 100644
--- a/src/core/hle/service/apm/controller.h
+++ b/src/core/hle/service/apm/controller.h
@@ -56,7 +56,7 @@ public:
56 void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config); 56 void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config);
57 void SetFromCpuBoostMode(CpuBoostMode mode); 57 void SetFromCpuBoostMode(CpuBoostMode mode);
58 58
59 PerformanceMode GetCurrentPerformanceMode(); 59 PerformanceMode GetCurrentPerformanceMode() const;
60 PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode); 60 PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode);
61 61
62private: 62private:
diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp
index 9d6946bc5..b86fda29a 100644
--- a/src/core/hle/service/bcat/backend/backend.cpp
+++ b/src/core/hle/service/bcat/backend/backend.cpp
@@ -10,8 +10,8 @@
10 10
11namespace Service::BCAT { 11namespace Service::BCAT {
12 12
13ProgressServiceBackend::ProgressServiceBackend(std::string_view event_name) { 13ProgressServiceBackend::ProgressServiceBackend(Kernel::KernelCore& kernel,
14 auto& kernel{Core::System::GetInstance().Kernel()}; 14 std::string_view event_name) {
15 event = Kernel::WritableEvent::CreateEventPair( 15 event = Kernel::WritableEvent::CreateEventPair(
16 kernel, Kernel::ResetType::Automatic, 16 kernel, Kernel::ResetType::Automatic,
17 std::string("ProgressServiceBackend:UpdateEvent:").append(event_name)); 17 std::string("ProgressServiceBackend:UpdateEvent:").append(event_name));
diff --git a/src/core/hle/service/bcat/backend/backend.h b/src/core/hle/service/bcat/backend/backend.h
index 51dbd3316..ea4b16ad0 100644
--- a/src/core/hle/service/bcat/backend/backend.h
+++ b/src/core/hle/service/bcat/backend/backend.h
@@ -15,6 +15,14 @@
15#include "core/hle/kernel/writable_event.h" 15#include "core/hle/kernel/writable_event.h"
16#include "core/hle/result.h" 16#include "core/hle/result.h"
17 17
18namespace Core {
19class System;
20}
21
22namespace Kernel {
23class KernelCore;
24}
25
18namespace Service::BCAT { 26namespace Service::BCAT {
19 27
20struct DeliveryCacheProgressImpl; 28struct DeliveryCacheProgressImpl;
@@ -88,7 +96,7 @@ public:
88 void FinishDownload(ResultCode result); 96 void FinishDownload(ResultCode result);
89 97
90private: 98private:
91 explicit ProgressServiceBackend(std::string_view event_name); 99 explicit ProgressServiceBackend(Kernel::KernelCore& kernel, std::string_view event_name);
92 100
93 Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent() const; 101 Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent() const;
94 DeliveryCacheProgressImpl& GetImpl(); 102 DeliveryCacheProgressImpl& GetImpl();
@@ -145,6 +153,6 @@ public:
145 std::optional<std::vector<u8>> GetLaunchParameter(TitleIDVersion title) override; 153 std::optional<std::vector<u8>> GetLaunchParameter(TitleIDVersion title) override;
146}; 154};
147 155
148std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter); 156std::unique_ptr<Backend> CreateBackendFromSettings(Core::System& system, DirectoryGetter getter);
149 157
150} // namespace Service::BCAT 158} // namespace Service::BCAT
diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp
index 64022982b..918159e11 100644
--- a/src/core/hle/service/bcat/backend/boxcat.cpp
+++ b/src/core/hle/service/bcat/backend/boxcat.cpp
@@ -104,14 +104,15 @@ std::string GetZIPFilePath(u64 title_id) {
104 104
105// If the error is something the user should know about (build ID mismatch, bad client version), 105// If the error is something the user should know about (build ID mismatch, bad client version),
106// display an error. 106// display an error.
107void HandleDownloadDisplayResult(DownloadResult res) { 107void HandleDownloadDisplayResult(const AM::Applets::AppletManager& applet_manager,
108 DownloadResult res) {
108 if (res == DownloadResult::Success || res == DownloadResult::NoResponse || 109 if (res == DownloadResult::Success || res == DownloadResult::NoResponse ||
109 res == DownloadResult::GeneralWebError || res == DownloadResult::GeneralFSError || 110 res == DownloadResult::GeneralWebError || res == DownloadResult::GeneralFSError ||
110 res == DownloadResult::NoMatchTitleId || res == DownloadResult::InvalidContentType) { 111 res == DownloadResult::NoMatchTitleId || res == DownloadResult::InvalidContentType) {
111 return; 112 return;
112 } 113 }
113 114
114 const auto& frontend{Core::System::GetInstance().GetAppletManager().GetAppletFrontendSet()}; 115 const auto& frontend{applet_manager.GetAppletFrontendSet()};
115 frontend.error->ShowCustomErrorText( 116 frontend.error->ShowCustomErrorText(
116 ResultCode(-1), "There was an error while attempting to use Boxcat.", 117 ResultCode(-1), "There was an error while attempting to use Boxcat.",
117 DOWNLOAD_RESULT_LOG_MESSAGES[static_cast<std::size_t>(res)], [] {}); 118 DOWNLOAD_RESULT_LOG_MESSAGES[static_cast<std::size_t>(res)], [] {});
@@ -264,12 +265,13 @@ private:
264 u64 build_id; 265 u64 build_id;
265}; 266};
266 267
267Boxcat::Boxcat(DirectoryGetter getter) : Backend(std::move(getter)) {} 268Boxcat::Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter)
269 : Backend(std::move(getter)), applet_manager{applet_manager_} {}
268 270
269Boxcat::~Boxcat() = default; 271Boxcat::~Boxcat() = default;
270 272
271void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, 273void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, DirectoryGetter dir_getter,
272 ProgressServiceBackend& progress, 274 TitleIDVersion title, ProgressServiceBackend& progress,
273 std::optional<std::string> dir_name = {}) { 275 std::optional<std::string> dir_name = {}) {
274 progress.SetNeedHLELock(true); 276 progress.SetNeedHLELock(true);
275 277
@@ -295,7 +297,7 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title,
295 FileUtil::Delete(zip_path); 297 FileUtil::Delete(zip_path);
296 } 298 }
297 299
298 HandleDownloadDisplayResult(res); 300 HandleDownloadDisplayResult(applet_manager, res);
299 progress.FinishDownload(ERROR_GENERAL_BCAT_FAILURE); 301 progress.FinishDownload(ERROR_GENERAL_BCAT_FAILURE);
300 return; 302 return;
301 } 303 }
@@ -364,17 +366,24 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title,
364 366
365bool Boxcat::Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) { 367bool Boxcat::Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) {
366 is_syncing.exchange(true); 368 is_syncing.exchange(true);
367 std::thread([this, title, &progress] { SynchronizeInternal(dir_getter, title, progress); }) 369
370 std::thread([this, title, &progress] {
371 SynchronizeInternal(applet_manager, dir_getter, title, progress);
372 })
368 .detach(); 373 .detach();
374
369 return true; 375 return true;
370} 376}
371 377
372bool Boxcat::SynchronizeDirectory(TitleIDVersion title, std::string name, 378bool Boxcat::SynchronizeDirectory(TitleIDVersion title, std::string name,
373 ProgressServiceBackend& progress) { 379 ProgressServiceBackend& progress) {
374 is_syncing.exchange(true); 380 is_syncing.exchange(true);
375 std::thread( 381
376 [this, title, name, &progress] { SynchronizeInternal(dir_getter, title, progress, name); }) 382 std::thread([this, title, name, &progress] {
383 SynchronizeInternal(applet_manager, dir_getter, title, progress, name);
384 })
377 .detach(); 385 .detach();
386
378 return true; 387 return true;
379} 388}
380 389
@@ -420,7 +429,7 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title)
420 FileUtil::Delete(path); 429 FileUtil::Delete(path);
421 } 430 }
422 431
423 HandleDownloadDisplayResult(res); 432 HandleDownloadDisplayResult(applet_manager, res);
424 return std::nullopt; 433 return std::nullopt;
425 } 434 }
426 } 435 }
diff --git a/src/core/hle/service/bcat/backend/boxcat.h b/src/core/hle/service/bcat/backend/boxcat.h
index 601151189..d65b42e58 100644
--- a/src/core/hle/service/bcat/backend/boxcat.h
+++ b/src/core/hle/service/bcat/backend/boxcat.h
@@ -9,6 +9,10 @@
9#include <optional> 9#include <optional>
10#include "core/hle/service/bcat/backend/backend.h" 10#include "core/hle/service/bcat/backend/backend.h"
11 11
12namespace Service::AM::Applets {
13class AppletManager;
14}
15
12namespace Service::BCAT { 16namespace Service::BCAT {
13 17
14struct EventStatus { 18struct EventStatus {
@@ -20,12 +24,13 @@ struct EventStatus {
20/// Boxcat is yuzu's custom backend implementation of Nintendo's BCAT service. It is free to use and 24/// Boxcat is yuzu's custom backend implementation of Nintendo's BCAT service. It is free to use and
21/// doesn't require a switch or nintendo account. The content is controlled by the yuzu team. 25/// doesn't require a switch or nintendo account. The content is controlled by the yuzu team.
22class Boxcat final : public Backend { 26class Boxcat final : public Backend {
23 friend void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, 27 friend void SynchronizeInternal(AM::Applets::AppletManager& applet_manager,
28 DirectoryGetter dir_getter, TitleIDVersion title,
24 ProgressServiceBackend& progress, 29 ProgressServiceBackend& progress,
25 std::optional<std::string> dir_name); 30 std::optional<std::string> dir_name);
26 31
27public: 32public:
28 explicit Boxcat(DirectoryGetter getter); 33 explicit Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter);
29 ~Boxcat() override; 34 ~Boxcat() override;
30 35
31 bool Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) override; 36 bool Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) override;
@@ -53,6 +58,7 @@ private:
53 58
54 class Client; 59 class Client;
55 std::unique_ptr<Client> client; 60 std::unique_ptr<Client> client;
61 AM::Applets::AppletManager& applet_manager;
56}; 62};
57 63
58} // namespace Service::BCAT 64} // namespace Service::BCAT
diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp
index 4e4aa758b..6d9d1527d 100644
--- a/src/core/hle/service/bcat/module.cpp
+++ b/src/core/hle/service/bcat/module.cpp
@@ -125,7 +125,11 @@ private:
125class IBcatService final : public ServiceFramework<IBcatService> { 125class IBcatService final : public ServiceFramework<IBcatService> {
126public: 126public:
127 explicit IBcatService(Core::System& system_, Backend& backend_) 127 explicit IBcatService(Core::System& system_, Backend& backend_)
128 : ServiceFramework("IBcatService"), system{system_}, backend{backend_} { 128 : ServiceFramework("IBcatService"), system{system_}, backend{backend_},
129 progress{{
130 ProgressServiceBackend{system_.Kernel(), "Normal"},
131 ProgressServiceBackend{system_.Kernel(), "Directory"},
132 }} {
129 // clang-format off 133 // clang-format off
130 static const FunctionInfo functions[] = { 134 static const FunctionInfo functions[] = {
131 {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"}, 135 {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"},
@@ -249,10 +253,7 @@ private:
249 Core::System& system; 253 Core::System& system;
250 Backend& backend; 254 Backend& backend;
251 255
252 std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress{ 256 std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress;
253 ProgressServiceBackend{"Normal"},
254 ProgressServiceBackend{"Directory"},
255 };
256}; 257};
257 258
258void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) { 259void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) {
@@ -557,12 +558,12 @@ void Module::Interface::CreateDeliveryCacheStorageServiceWithApplicationId(
557 rb.PushIpcInterface<IDeliveryCacheStorageService>(fsc.GetBCATDirectory(title_id)); 558 rb.PushIpcInterface<IDeliveryCacheStorageService>(fsc.GetBCATDirectory(title_id));
558} 559}
559 560
560std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { 561std::unique_ptr<Backend> CreateBackendFromSettings([[maybe_unused]] Core::System& system,
561 const auto backend = Settings::values.bcat_backend; 562 DirectoryGetter getter) {
562
563#ifdef YUZU_ENABLE_BOXCAT 563#ifdef YUZU_ENABLE_BOXCAT
564 if (backend == "boxcat") 564 if (Settings::values.bcat_backend == "boxcat") {
565 return std::make_unique<Boxcat>(std::move(getter)); 565 return std::make_unique<Boxcat>(system.GetAppletManager(), std::move(getter));
566 }
566#endif 567#endif
567 568
568 return std::make_unique<NullBackend>(std::move(getter)); 569 return std::make_unique<NullBackend>(std::move(getter));
@@ -571,7 +572,8 @@ std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) {
571Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_, 572Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_,
572 FileSystem::FileSystemController& fsc_, const char* name) 573 FileSystem::FileSystemController& fsc_, const char* name)
573 : ServiceFramework(name), fsc{fsc_}, module{std::move(module_)}, 574 : ServiceFramework(name), fsc{fsc_}, module{std::move(module_)},
574 backend{CreateBackendFromSettings([&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, 575 backend{CreateBackendFromSettings(system_,
576 [&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })},
575 system{system_} {} 577 system{system_} {}
576 578
577Module::Interface::~Interface() = default; 579Module::Interface::~Interface() = default;
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp
index a2b25a796..81bd2f3cb 100644
--- a/src/core/hle/service/hid/controllers/npad.cpp
+++ b/src/core/hle/service/hid/controllers/npad.cpp
@@ -583,36 +583,6 @@ bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) {
583 return true; 583 return true;
584} 584}
585 585
586bool Controller_NPad::IsControllerSupported(NPadControllerType controller) {
587 if (controller == NPadControllerType::Handheld) {
588 // Handheld is not even a supported type, lets stop here
589 if (std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(),
590 NPAD_HANDHELD) == supported_npad_id_types.end()) {
591 return false;
592 }
593 // Handheld should not be supported in docked mode
594 if (Settings::values.use_docked_mode) {
595 return false;
596 }
597 }
598 switch (controller) {
599 case NPadControllerType::ProController:
600 return style.pro_controller;
601 case NPadControllerType::Handheld:
602 return style.handheld;
603 case NPadControllerType::JoyDual:
604 return style.joycon_dual;
605 case NPadControllerType::JoyLeft:
606 return style.joycon_left;
607 case NPadControllerType::JoyRight:
608 return style.joycon_right;
609 case NPadControllerType::Pokeball:
610 return style.pokeball;
611 default:
612 return false;
613 }
614}
615
616Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) { 586Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) {
617 if (npad_id == npad_id_list.back() || npad_id == npad_id_list[npad_id_list.size() - 2]) { 587 if (npad_id == npad_id_list.back() || npad_id == npad_id_list[npad_id_list.size() - 2]) {
618 // These are controllers without led patterns 588 // These are controllers without led patterns
@@ -659,25 +629,24 @@ void Controller_NPad::ClearAllConnectedControllers() {
659} 629}
660 630
661void Controller_NPad::DisconnectAllConnectedControllers() { 631void Controller_NPad::DisconnectAllConnectedControllers() {
662 std::for_each(connected_controllers.begin(), connected_controllers.end(), 632 for (ControllerHolder& controller : connected_controllers) {
663 [](ControllerHolder& controller) { controller.is_connected = false; }); 633 controller.is_connected = false;
634 }
664} 635}
665 636
666void Controller_NPad::ConnectAllDisconnectedControllers() { 637void Controller_NPad::ConnectAllDisconnectedControllers() {
667 std::for_each(connected_controllers.begin(), connected_controllers.end(), 638 for (ControllerHolder& controller : connected_controllers) {
668 [](ControllerHolder& controller) { 639 if (controller.type != NPadControllerType::None && !controller.is_connected) {
669 if (controller.type != NPadControllerType::None && !controller.is_connected) { 640 controller.is_connected = true;
670 controller.is_connected = false; 641 }
671 } 642 }
672 });
673} 643}
674 644
675void Controller_NPad::ClearAllControllers() { 645void Controller_NPad::ClearAllControllers() {
676 std::for_each(connected_controllers.begin(), connected_controllers.end(), 646 for (ControllerHolder& controller : connected_controllers) {
677 [](ControllerHolder& controller) { 647 controller.type = NPadControllerType::None;
678 controller.type = NPadControllerType::None; 648 controller.is_connected = false;
679 controller.is_connected = false; 649 }
680 });
681} 650}
682 651
683u32 Controller_NPad::GetAndResetPressState() { 652u32 Controller_NPad::GetAndResetPressState() {
@@ -685,10 +654,10 @@ u32 Controller_NPad::GetAndResetPressState() {
685} 654}
686 655
687bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const { 656bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const {
688 const bool support_handheld =
689 std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), NPAD_HANDHELD) !=
690 supported_npad_id_types.end();
691 if (controller == NPadControllerType::Handheld) { 657 if (controller == NPadControllerType::Handheld) {
658 const bool support_handheld =
659 std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(),
660 NPAD_HANDHELD) != supported_npad_id_types.end();
692 // Handheld is not even a supported type, lets stop here 661 // Handheld is not even a supported type, lets stop here
693 if (!support_handheld) { 662 if (!support_handheld) {
694 return false; 663 return false;
@@ -700,6 +669,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
700 669
701 return true; 670 return true;
702 } 671 }
672
703 if (std::any_of(supported_npad_id_types.begin(), supported_npad_id_types.end(), 673 if (std::any_of(supported_npad_id_types.begin(), supported_npad_id_types.end(),
704 [](u32 npad_id) { return npad_id <= MAX_NPAD_ID; })) { 674 [](u32 npad_id) { return npad_id <= MAX_NPAD_ID; })) {
705 switch (controller) { 675 switch (controller) {
@@ -717,6 +687,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const
717 return false; 687 return false;
718 } 688 }
719 } 689 }
690
720 return false; 691 return false;
721} 692}
722 693
@@ -795,6 +766,7 @@ Controller_NPad::NPadControllerType Controller_NPad::DecideBestController(
795 priority_list.push_back(NPadControllerType::JoyLeft); 766 priority_list.push_back(NPadControllerType::JoyLeft);
796 priority_list.push_back(NPadControllerType::JoyRight); 767 priority_list.push_back(NPadControllerType::JoyRight);
797 priority_list.push_back(NPadControllerType::JoyDual); 768 priority_list.push_back(NPadControllerType::JoyDual);
769 break;
798 } 770 }
799 771
800 const auto iter = std::find_if(priority_list.begin(), priority_list.end(), 772 const auto iter = std::find_if(priority_list.begin(), priority_list.end(),
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h
index 1bc3d55d6..16c4caa1f 100644
--- a/src/core/hle/service/hid/controllers/npad.h
+++ b/src/core/hle/service/hid/controllers/npad.h
@@ -301,6 +301,11 @@ private:
301 bool is_connected; 301 bool is_connected;
302 }; 302 };
303 303
304 void InitNewlyAddedControler(std::size_t controller_idx);
305 bool IsControllerSupported(NPadControllerType controller) const;
306 NPadControllerType DecideBestController(NPadControllerType priority) const;
307 void RequestPadStateUpdate(u32 npad_id);
308
304 u32 press_state{}; 309 u32 press_state{};
305 310
306 NPadType style{}; 311 NPadType style{};
@@ -321,12 +326,7 @@ private:
321 std::array<ControllerHolder, 10> connected_controllers{}; 326 std::array<ControllerHolder, 10> connected_controllers{};
322 bool can_controllers_vibrate{true}; 327 bool can_controllers_vibrate{true};
323 328
324 void InitNewlyAddedControler(std::size_t controller_idx);
325 bool IsControllerSupported(NPadControllerType controller) const;
326 NPadControllerType DecideBestController(NPadControllerType priority) const;
327 void RequestPadStateUpdate(u32 npad_id);
328 std::array<ControllerPad, 10> npad_pad_states{}; 329 std::array<ControllerPad, 10> npad_pad_states{};
329 bool IsControllerSupported(NPadControllerType controller);
330 bool is_in_lr_assignment_mode{false}; 330 bool is_in_lr_assignment_mode{false};
331 Core::System& system; 331 Core::System& system;
332}; 332};
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index f764388bc..3f7b8e670 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -5,6 +5,7 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/logging/log.h" 6#include "common/logging/log.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/core_timing.h"
8#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" 9#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
9#include "core/hle/service/nvdrv/devices/nvmap.h" 10#include "core/hle/service/nvdrv/devices/nvmap.h"
10#include "core/perf_stats.h" 11#include "core/perf_stats.h"
@@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
38 transform, crop_rect}; 39 transform, crop_rect};
39 40
40 system.GetPerfStats().EndGameFrame(); 41 system.GetPerfStats().EndGameFrame();
42 system.GetPerfStats().EndSystemFrame();
41 system.GPU().SwapBuffers(&framebuffer); 43 system.GPU().SwapBuffers(&framebuffer);
44 system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
45 system.GetPerfStats().BeginSystemFrame();
42} 46}
43 47
44} // namespace Service::Nvidia::Devices 48} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index eb88fee1b..b27ee0502 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
63 return NvResult::BadParameter; 63 return NvResult::BadParameter;
64 } 64 }
65 65
66 u32 event_id = params.value & 0x00FF;
67
68 if (event_id >= MaxNvEvents) {
69 std::memcpy(output.data(), &params, sizeof(params));
70 return NvResult::BadParameter;
71 }
72
73 auto event = events_interface.events[event_id];
66 auto& gpu = system.GPU(); 74 auto& gpu = system.GPU();
67 // This is mostly to take into account unimplemented features. As synced 75 // This is mostly to take into account unimplemented features. As synced
68 // gpu is always synced. 76 // gpu is always synced.
69 if (!gpu.IsAsync()) { 77 if (!gpu.IsAsync()) {
78 event.writable->Signal();
70 return NvResult::Success; 79 return NvResult::Success;
71 } 80 }
72 auto lock = gpu.LockSync(); 81 auto lock = gpu.LockSync();
73 const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); 82 const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
74 const s32 diff = current_syncpoint_value - params.threshold; 83 const s32 diff = current_syncpoint_value - params.threshold;
75 if (diff >= 0) { 84 if (diff >= 0) {
85 event.writable->Signal();
76 params.value = current_syncpoint_value; 86 params.value = current_syncpoint_value;
77 std::memcpy(output.data(), &params, sizeof(params)); 87 std::memcpy(output.data(), &params, sizeof(params));
78 return NvResult::Success; 88 return NvResult::Success;
@@ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
88 return NvResult::Timeout; 98 return NvResult::Timeout;
89 } 99 }
90 100
91 u32 event_id;
92 if (is_async) {
93 event_id = params.value & 0x00FF;
94 if (event_id >= MaxNvEvents) {
95 std::memcpy(output.data(), &params, sizeof(params));
96 return NvResult::BadParameter;
97 }
98 } else {
99 if (ctrl.fresh_call) {
100 const auto result = events_interface.GetFreeEvent();
101 if (result) {
102 event_id = *result;
103 } else {
104 LOG_CRITICAL(Service_NVDRV, "No Free Events available!");
105 event_id = params.value & 0x00FF;
106 }
107 } else {
108 event_id = ctrl.event_id;
109 }
110 }
111
112 EventState status = events_interface.status[event_id]; 101 EventState status = events_interface.status[event_id];
113 if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { 102 if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) {
114 events_interface.SetEventStatus(event_id, EventState::Waiting); 103 events_interface.SetEventStatus(event_id, EventState::Waiting);
@@ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
120 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; 109 params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
121 } 110 }
122 params.value |= event_id; 111 params.value |= event_id;
123 events_interface.events[event_id].writable->Clear(); 112 event.writable->Clear();
124 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); 113 gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
125 if (!is_async && ctrl.fresh_call) { 114 if (!is_async && ctrl.fresh_call) {
126 ctrl.must_delay = true; 115 ctrl.must_delay = true;
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index 5e0c23602..68d139cfb 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
134 IPC::ResponseBuilder rb{ctx, 3, 1}; 134 IPC::ResponseBuilder rb{ctx, 3, 1};
135 rb.Push(RESULT_SUCCESS); 135 rb.Push(RESULT_SUCCESS);
136 if (event_id < MaxNvEvents) { 136 if (event_id < MaxNvEvents) {
137 rb.PushCopyObjects(nvdrv->GetEvent(event_id)); 137 auto event = nvdrv->GetEvent(event_id);
138 event->Clear();
139 rb.PushCopyObjects(event);
138 rb.Push<u32>(NvResult::Success); 140 rb.Push<u32>(NvResult::Success);
139 } else { 141 } else {
140 rb.Push<u32>(0); 142 rb.Push<u32>(0);
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 307a7e928..7bfb99e34 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -40,8 +40,8 @@ Module::Module(Core::System& system) {
40 auto& kernel = system.Kernel(); 40 auto& kernel = system.Kernel();
41 for (u32 i = 0; i < MaxNvEvents; i++) { 41 for (u32 i = 0; i < MaxNvEvents; i++) {
42 std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); 42 std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
43 events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( 43 events_interface.events[i] =
44 kernel, Kernel::ResetType::Automatic, event_label); 44 Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label);
45 events_interface.status[i] = EventState::Free; 45 events_interface.status[i] = EventState::Free;
46 events_interface.registered[i] = false; 46 events_interface.registered[i] = false;
47 } 47 }
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index e1a07d3ee..55b68eb0c 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -14,8 +14,8 @@
14 14
15namespace Service::NVFlinger { 15namespace Service::NVFlinger {
16 16
17BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) { 17BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id)
18 auto& kernel = Core::System::GetInstance().Kernel(); 18 : id(id), layer_id(layer_id) {
19 buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, 19 buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
20 "BufferQueue NativeHandle"); 20 "BufferQueue NativeHandle");
21} 21}
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index 356bedb81..8f9b18547 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -15,6 +15,10 @@
15#include "core/hle/kernel/writable_event.h" 15#include "core/hle/kernel/writable_event.h"
16#include "core/hle/service/nvdrv/nvdata.h" 16#include "core/hle/service/nvdrv/nvdata.h"
17 17
18namespace Kernel {
19class KernelCore;
20}
21
18namespace Service::NVFlinger { 22namespace Service::NVFlinger {
19 23
20struct IGBPBuffer { 24struct IGBPBuffer {
@@ -44,7 +48,7 @@ public:
44 NativeWindowFormat = 2, 48 NativeWindowFormat = 2,
45 }; 49 };
46 50
47 BufferQueue(u32 id, u64 layer_id); 51 explicit BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id);
48 ~BufferQueue(); 52 ~BufferQueue();
49 53
50 enum class BufferTransformFlags : u32 { 54 enum class BufferTransformFlags : u32 {
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 2e4d707b9..cc9522aad 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -83,7 +83,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
83 83
84 const u64 layer_id = next_layer_id++; 84 const u64 layer_id = next_layer_id++;
85 const u32 buffer_queue_id = next_buffer_queue_id++; 85 const u32 buffer_queue_id = next_buffer_queue_id++;
86 buffer_queues.emplace_back(buffer_queue_id, layer_id); 86 buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id);
87 display->CreateLayer(layer_id, buffer_queues.back()); 87 display->CreateLayer(layer_id, buffer_queues.back());
88 return layer_id; 88 return layer_id;
89} 89}
@@ -187,14 +187,18 @@ void NVFlinger::Compose() {
187 MicroProfileFlip(); 187 MicroProfileFlip();
188 188
189 if (!buffer) { 189 if (!buffer) {
190 // There was no queued buffer to draw, render previous frame
191 system.GetPerfStats().EndGameFrame();
192 system.GPU().SwapBuffers({});
193 continue; 190 continue;
194 } 191 }
195 192
196 const auto& igbp_buffer = buffer->get().igbp_buffer; 193 const auto& igbp_buffer = buffer->get().igbp_buffer;
197 194
195 const auto& gpu = system.GPU();
196 const auto& multi_fence = buffer->get().multi_fence;
197 for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
198 const auto& fence = multi_fence.fences[fence_id];
199 gpu.WaitFence(fence.id, fence.value);
200 }
201
198 // Now send the buffer to the GPU for drawing. 202 // Now send the buffer to the GPU for drawing.
199 // TODO(Subv): Support more than just disp0. The display device selection is probably based 203 // TODO(Subv): Support more than just disp0. The display device selection is probably based
200 // on which display we're drawing (Default, Internal, External, etc) 204 // on which display we're drawing (Default, Internal, External, etc)
diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp
index b56cb0627..10821d452 100644
--- a/src/core/memory/cheat_engine.cpp
+++ b/src/core/memory/cheat_engine.cpp
@@ -22,7 +22,7 @@ constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF;
22 22
23StandardVmCallbacks::StandardVmCallbacks(const Core::System& system, 23StandardVmCallbacks::StandardVmCallbacks(const Core::System& system,
24 const CheatProcessMetadata& metadata) 24 const CheatProcessMetadata& metadata)
25 : system(system), metadata(metadata) {} 25 : metadata(metadata), system(system) {}
26 26
27StandardVmCallbacks::~StandardVmCallbacks() = default; 27StandardVmCallbacks::~StandardVmCallbacks() = default;
28 28
@@ -176,9 +176,8 @@ std::vector<CheatEntry> TextCheatParser::Parse(const Core::System& system,
176 176
177CheatEngine::CheatEngine(Core::System& system, std::vector<CheatEntry> cheats, 177CheatEngine::CheatEngine(Core::System& system, std::vector<CheatEntry> cheats,
178 const std::array<u8, 0x20>& build_id) 178 const std::array<u8, 0x20>& build_id)
179 : system{system}, core_timing{system.CoreTiming()}, vm{std::make_unique<StandardVmCallbacks>( 179 : vm{std::make_unique<StandardVmCallbacks>(system, metadata)},
180 system, metadata)}, 180 cheats(std::move(cheats)), core_timing{system.CoreTiming()}, system{system} {
181 cheats(std::move(cheats)) {
182 metadata.main_nso_build_id = build_id; 181 metadata.main_nso_build_id = build_id;
183} 182}
184 183
diff --git a/src/core/memory/dmnt_cheat_vm.cpp b/src/core/memory/dmnt_cheat_vm.cpp
index cc16d15a4..4f4fa5099 100644
--- a/src/core/memory/dmnt_cheat_vm.cpp
+++ b/src/core/memory/dmnt_cheat_vm.cpp
@@ -1133,8 +1133,8 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) {
1133 case SaveRestoreRegisterOpType::ClearRegs: 1133 case SaveRestoreRegisterOpType::ClearRegs:
1134 case SaveRestoreRegisterOpType::Restore: 1134 case SaveRestoreRegisterOpType::Restore:
1135 default: 1135 default:
1136 src = registers.data(); 1136 src = saved_values.data();
1137 dst = saved_values.data(); 1137 dst = registers.data();
1138 break; 1138 break;
1139 } 1139 }
1140 for (std::size_t i = 0; i < NumRegisters; i++) { 1140 for (std::size_t i = 0; i < NumRegisters; i++) {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index eaa694ff8..cb6eda1b8 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -6,6 +6,7 @@ add_library(video_core STATIC
6 dma_pusher.h 6 dma_pusher.h
7 debug_utils/debug_utils.cpp 7 debug_utils/debug_utils.cpp
8 debug_utils/debug_utils.h 8 debug_utils/debug_utils.h
9 engines/const_buffer_engine_interface.h
9 engines/const_buffer_info.h 10 engines/const_buffer_info.h
10 engines/engine_upload.cpp 11 engines/engine_upload.cpp
11 engines/engine_upload.h 12 engines/engine_upload.h
@@ -107,10 +108,12 @@ add_library(video_core STATIC
107 shader/decode/other.cpp 108 shader/decode/other.cpp
108 shader/ast.cpp 109 shader/ast.cpp
109 shader/ast.h 110 shader/ast.h
110 shader/control_flow.cpp
111 shader/control_flow.h
112 shader/compiler_settings.cpp 111 shader/compiler_settings.cpp
113 shader/compiler_settings.h 112 shader/compiler_settings.h
113 shader/const_buffer_locker.cpp
114 shader/const_buffer_locker.h
115 shader/control_flow.cpp
116 shader/control_flow.h
114 shader/decode.cpp 117 shader/decode.cpp
115 shader/expr.cpp 118 shader/expr.cpp
116 shader/expr.h 119 shader/expr.h
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
new file mode 100644
index 000000000..ac27b6cbe
--- /dev/null
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -0,0 +1,119 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <type_traits>
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/textures/texture.h"
12
13namespace Tegra::Engines {
14
15enum class ShaderType : u32 {
16 Vertex = 0,
17 TesselationControl = 1,
18 TesselationEval = 2,
19 Geometry = 3,
20 Fragment = 4,
21 Compute = 5,
22};
23
24struct SamplerDescriptor {
25 union {
26 BitField<0, 20, Tegra::Shader::TextureType> texture_type;
27 BitField<20, 1, u32> is_array;
28 BitField<21, 1, u32> is_buffer;
29 BitField<22, 1, u32> is_shadow;
30 u32 raw{};
31 };
32
33 bool operator==(const SamplerDescriptor& rhs) const noexcept {
34 return raw == rhs.raw;
35 }
36
37 bool operator!=(const SamplerDescriptor& rhs) const noexcept {
38 return !operator==(rhs);
39 }
40
41 static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
42 SamplerDescriptor result;
43 switch (tic_texture_type) {
44 case Tegra::Texture::TextureType::Texture1D:
45 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
46 result.is_array.Assign(0);
47 result.is_buffer.Assign(0);
48 result.is_shadow.Assign(0);
49 return result;
50 case Tegra::Texture::TextureType::Texture2D:
51 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
52 result.is_array.Assign(0);
53 result.is_buffer.Assign(0);
54 result.is_shadow.Assign(0);
55 return result;
56 case Tegra::Texture::TextureType::Texture3D:
57 result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
58 result.is_array.Assign(0);
59 result.is_buffer.Assign(0);
60 result.is_shadow.Assign(0);
61 return result;
62 case Tegra::Texture::TextureType::TextureCubemap:
63 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
64 result.is_array.Assign(0);
65 result.is_buffer.Assign(0);
66 result.is_shadow.Assign(0);
67 return result;
68 case Tegra::Texture::TextureType::Texture1DArray:
69 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
70 result.is_array.Assign(1);
71 result.is_buffer.Assign(0);
72 result.is_shadow.Assign(0);
73 return result;
74 case Tegra::Texture::TextureType::Texture2DArray:
75 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
76 result.is_array.Assign(1);
77 result.is_buffer.Assign(0);
78 result.is_shadow.Assign(0);
79 return result;
80 case Tegra::Texture::TextureType::Texture1DBuffer:
81 result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
82 result.is_array.Assign(0);
83 result.is_buffer.Assign(1);
84 result.is_shadow.Assign(0);
85 return result;
86 case Tegra::Texture::TextureType::Texture2DNoMipmap:
87 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
88 result.is_array.Assign(0);
89 result.is_buffer.Assign(0);
90 result.is_shadow.Assign(0);
91 return result;
92 case Tegra::Texture::TextureType::TextureCubeArray:
93 result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
94 result.is_array.Assign(1);
95 result.is_buffer.Assign(0);
96 result.is_shadow.Assign(0);
97 return result;
98 default:
99 result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
100 result.is_array.Assign(0);
101 result.is_buffer.Assign(0);
102 result.is_shadow.Assign(0);
103 return result;
104 }
105 }
106};
107static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
108
109class ConstBufferEngineInterface {
110public:
111 virtual ~ConstBufferEngineInterface() = default;
112 virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
113 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
114 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
115 u64 offset) const = 0;
116 virtual u32 GetBoundBuffer() const = 0;
117};
118
119} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 63d449135..91adef360 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -70,13 +70,31 @@ Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHan
70 GetTSCEntry(tex_handle.tsc_id)}; 70 GetTSCEntry(tex_handle.tsc_id)};
71} 71}
72 72
73u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { 73u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
74 ASSERT(stage == ShaderType::Compute);
74 const auto& buffer = launch_description.const_buffer_config[const_buffer]; 75 const auto& buffer = launch_description.const_buffer_config[const_buffer];
75 u32 result; 76 u32 result;
76 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); 77 std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
77 return result; 78 return result;
78} 79}
79 80
81SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
82 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
83}
84
85SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
86 u64 offset) const {
87 ASSERT(stage == ShaderType::Compute);
88 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
89 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
90
91 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
92 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset);
93 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
94 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
95 return result;
96}
97
80void KeplerCompute::ProcessLaunch() { 98void KeplerCompute::ProcessLaunch() {
81 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 99 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
82 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 100 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 90cf650d2..8e7182727 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,6 +10,7 @@
10#include "common/bit_field.h" 10#include "common/bit_field.h"
11#include "common/common_funcs.h" 11#include "common/common_funcs.h"
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/const_buffer_engine_interface.h"
13#include "video_core/engines/engine_upload.h" 14#include "video_core/engines/engine_upload.h"
14#include "video_core/gpu.h" 15#include "video_core/gpu.h"
15#include "video_core/textures/texture.h" 16#include "video_core/textures/texture.h"
@@ -37,7 +38,7 @@ namespace Tegra::Engines {
37#define KEPLER_COMPUTE_REG_INDEX(field_name) \ 38#define KEPLER_COMPUTE_REG_INDEX(field_name) \
38 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) 39 (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
39 40
40class KeplerCompute final { 41class KeplerCompute final : public ConstBufferEngineInterface {
41public: 42public:
42 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 43 explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
43 MemoryManager& memory_manager); 44 MemoryManager& memory_manager);
@@ -201,7 +202,16 @@ public:
201 Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, 202 Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
202 std::size_t offset) const; 203 std::size_t offset) const;
203 204
204 u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; 205 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
206
207 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
208
209 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
210 u64 offset) const override;
211
212 u32 GetBoundBuffer() const override {
213 return regs.tex_cb_index;
214 }
205 215
206private: 216private:
207 Core::System& system; 217 Core::System& system;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7802fd808..514ed93fa 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
98 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; 98 mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
99} 99}
100 100
101#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) 101#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
102 102
103void Maxwell3D::InitDirtySettings() { 103void Maxwell3D::InitDirtySettings() {
104 const auto set_block = [this](const u32 start, const u32 range, const u8 position) { 104 const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
105 const auto start_itr = dirty_pointers.begin() + start; 105 const auto start_itr = dirty_pointers.begin() + start;
106 const auto end_itr = start_itr + range; 106 const auto end_itr = start_itr + range;
107 std::fill(start_itr, end_itr, position); 107 std::fill(start_itr, end_itr, position);
@@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() {
112 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); 112 constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
113 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); 113 constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
114 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; 114 constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
115 u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); 115 u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
116 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { 116 for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
117 set_block(rt_reg, registers_per_rt, rt_dirty_reg); 117 set_block(rt_reg, registers_per_rt, rt_dirty_reg);
118 rt_dirty_reg++; 118 ++rt_dirty_reg;
119 } 119 }
120 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); 120 constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
121 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; 121 dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
@@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() {
129 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); 129 constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
130 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); 130 constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
131 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; 131 constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
132 u32 va_reg = DIRTY_REGS_POS(vertex_array); 132 u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
133 u32 vi_reg = DIRTY_REGS_POS(vertex_instance); 133 u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
134 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; 134 for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
135 vertex_reg += vertex_array_size) { 135 vertex_reg += vertex_array_size) {
136 set_block(vertex_reg, 3, va_reg); 136 set_block(vertex_reg, 3, va_dirty_reg);
137 // The divisor concerns vertex array instances 137 // The divisor concerns vertex array instances
138 dirty_pointers[vertex_reg + 3] = vi_reg; 138 dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
139 va_reg++; 139 ++va_dirty_reg;
140 vi_reg++; 140 ++vi_dirty_reg;
141 } 141 }
142 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); 142 constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
143 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); 143 constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
144 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; 144 constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
145 va_reg = DIRTY_REGS_POS(vertex_array); 145 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
146 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; 146 for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
147 vertex_reg += vertex_limit_size) { 147 vertex_reg += vertex_limit_size) {
148 set_block(vertex_reg, vertex_limit_size, va_reg); 148 set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
149 va_reg++; 149 va_dirty_reg++;
150 } 150 }
151 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); 151 constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
152 constexpr u32 vertex_instance_size = 152 constexpr u32 vertex_instance_size =
153 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); 153 sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
154 constexpr u32 vertex_instance_end = 154 constexpr u32 vertex_instance_end =
155 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; 155 vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
156 vi_reg = DIRTY_REGS_POS(vertex_instance); 156 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
157 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; 157 for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
158 vertex_reg += vertex_instance_size) { 158 vertex_reg += vertex_instance_size) {
159 set_block(vertex_reg, vertex_instance_size, vi_reg); 159 set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
160 vi_reg++; 160 vi_dirty_reg++;
161 } 161 }
162 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), 162 set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
163 DIRTY_REGS_POS(vertex_attrib_format)); 163 DIRTY_REGS_POS(vertex_attrib_format));
@@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() {
171 // State 171 // State
172 172
173 // Viewport 173 // Viewport
174 constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); 174 constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
175 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); 175 constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
176 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); 176 constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
177 set_block(viewport_start, viewport_size, viewport_dirty_reg); 177 set_block(viewport_start, viewport_size, viewport_dirty_reg);
@@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() {
198 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); 198 set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
199 199
200 // Depth Test 200 // Depth Test
201 constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); 201 constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
202 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; 202 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
203 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; 203 dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
204 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; 204 dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
@@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() {
223 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; 223 dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
224 224
225 // Color Mask 225 // Color Mask
226 constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); 226 constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
227 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; 227 dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
228 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), 228 set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
229 color_mask_dirty_reg); 229 color_mask_dirty_reg);
230 // Blend State 230 // Blend State
231 constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); 231 constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
232 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), 232 set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
233 blend_state_dirty_reg); 233 blend_state_dirty_reg);
234 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; 234 dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
@@ -237,12 +237,12 @@ void Maxwell3D::InitDirtySettings() {
237 blend_state_dirty_reg); 237 blend_state_dirty_reg);
238 238
239 // Scissor State 239 // Scissor State
240 constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); 240 constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
241 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), 241 set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
242 scissor_test_dirty_reg); 242 scissor_test_dirty_reg);
243 243
244 // Polygon Offset 244 // Polygon Offset
245 constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); 245 constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
246 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; 246 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
247 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; 247 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
248 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; 248 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
@@ -251,7 +251,7 @@ void Maxwell3D::InitDirtySettings() {
251 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; 251 dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
252 252
253 // Depth bounds 253 // Depth bounds
254 constexpr u32 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); 254 constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
255 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; 255 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
256 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; 256 dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
257} 257}
@@ -478,7 +478,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
478} 478}
479 479
480void Maxwell3D::FlushMMEInlineDraw() { 480void Maxwell3D::FlushMMEInlineDraw() {
481 LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), 481 LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
482 regs.vertex_buffer.count); 482 regs.vertex_buffer.count);
483 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); 483 ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
484 ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); 484 ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
@@ -846,7 +846,8 @@ void Maxwell3D::ProcessClearBuffers() {
846 rasterizer.Clear(); 846 rasterizer.Clear();
847} 847}
848 848
849u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { 849u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
850 ASSERT(stage != ShaderType::Compute);
850 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; 851 const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
851 const auto& buffer = shader_stage.const_buffers[const_buffer]; 852 const auto& buffer = shader_stage.const_buffers[const_buffer];
852 u32 result; 853 u32 result;
@@ -854,4 +855,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6
854 return result; 855 return result;
855} 856}
856 857
858SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
859 return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
860}
861
862SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
863 u64 offset) const {
864 ASSERT(stage != ShaderType::Compute);
865 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
866 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
867 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
868
869 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
870 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset);
871 SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
872 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
873 return result;
874}
875
857} // namespace Tegra::Engines 876} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index e3f1047d5..987ad77b2 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -15,6 +15,7 @@
15#include "common/common_funcs.h" 15#include "common/common_funcs.h"
16#include "common/common_types.h" 16#include "common/common_types.h"
17#include "common/math_util.h" 17#include "common/math_util.h"
18#include "video_core/engines/const_buffer_engine_interface.h"
18#include "video_core/engines/const_buffer_info.h" 19#include "video_core/engines/const_buffer_info.h"
19#include "video_core/engines/engine_upload.h" 20#include "video_core/engines/engine_upload.h"
20#include "video_core/gpu.h" 21#include "video_core/gpu.h"
@@ -44,7 +45,7 @@ namespace Tegra::Engines {
44#define MAXWELL3D_REG_INDEX(field_name) \ 45#define MAXWELL3D_REG_INDEX(field_name) \
45 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) 46 (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
46 47
47class Maxwell3D final { 48class Maxwell3D final : public ConstBufferEngineInterface {
48public: 49public:
49 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 50 explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
50 MemoryManager& memory_manager); 51 MemoryManager& memory_manager);
@@ -1165,6 +1166,8 @@ public:
1165 1166
1166 struct DirtyRegs { 1167 struct DirtyRegs {
1167 static constexpr std::size_t NUM_REGS = 256; 1168 static constexpr std::size_t NUM_REGS = 256;
1169 static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
1170
1168 union { 1171 union {
1169 struct { 1172 struct {
1170 bool null_dirty; 1173 bool null_dirty;
@@ -1257,7 +1260,16 @@ public:
1257 /// Returns the texture information for a specific texture in a specific shader stage. 1260 /// Returns the texture information for a specific texture in a specific shader stage.
1258 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; 1261 Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
1259 1262
1260 u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; 1263 u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
1264
1265 SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
1266
1267 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1268 u64 offset) const override;
1269
1270 u32 GetBoundBuffer() const override {
1271 return regs.tex_cb_index;
1272 }
1261 1273
1262 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than 1274 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
1263 /// we've seen used. 1275 /// we've seen used.
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7a6355ce2..d3d05a866 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 {
574}; 574};
575 575
576union Instruction { 576union Instruction {
577 Instruction& operator=(const Instruction& instr) { 577 constexpr Instruction& operator=(const Instruction& instr) {
578 value = instr.value; 578 value = instr.value;
579 return *this; 579 return *this;
580 } 580 }
@@ -1760,22 +1760,22 @@ public:
1760 1760
1761 class Matcher { 1761 class Matcher {
1762 public: 1762 public:
1763 Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) 1763 constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
1764 : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} 1764 : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
1765 1765
1766 const char* GetName() const { 1766 constexpr const char* GetName() const {
1767 return name; 1767 return name;
1768 } 1768 }
1769 1769
1770 u16 GetMask() const { 1770 constexpr u16 GetMask() const {
1771 return mask; 1771 return mask;
1772 } 1772 }
1773 1773
1774 Id GetId() const { 1774 constexpr Id GetId() const {
1775 return id; 1775 return id;
1776 } 1776 }
1777 1777
1778 Type GetType() const { 1778 constexpr Type GetType() const {
1779 return type; 1779 return type;
1780 } 1780 }
1781 1781
@@ -1784,7 +1784,7 @@ public:
1784 * @param instruction The instruction to test 1784 * @param instruction The instruction to test
1785 * @returns true if the given instruction matches. 1785 * @returns true if the given instruction matches.
1786 */ 1786 */
1787 bool Matches(u16 instruction) const { 1787 constexpr bool Matches(u16 instruction) const {
1788 return (instruction & mask) == expected; 1788 return (instruction & mask) == expected;
1789 } 1789 }
1790 1790
@@ -1818,7 +1818,7 @@ private:
1818 * A '0' in a bitstring indicates that a zero must be present at that bit position. 1818 * A '0' in a bitstring indicates that a zero must be present at that bit position.
1819 * A '1' in a bitstring indicates that a one must be present at that bit position. 1819 * A '1' in a bitstring indicates that a one must be present at that bit position.
1820 */ 1820 */
1821 static auto GetMaskAndExpect(const char* const bitstring) { 1821 static constexpr auto GetMaskAndExpect(const char* const bitstring) {
1822 u16 mask = 0, expect = 0; 1822 u16 mask = 0, expect = 0;
1823 for (std::size_t i = 0; i < opcode_bitsize; i++) { 1823 for (std::size_t i = 0; i < opcode_bitsize; i++) {
1824 const std::size_t bit_position = opcode_bitsize - i - 1; 1824 const std::size_t bit_position = opcode_bitsize - i - 1;
@@ -1835,15 +1835,15 @@ private:
1835 break; 1835 break;
1836 } 1836 }
1837 } 1837 }
1838 return std::make_tuple(mask, expect); 1838 return std::make_pair(mask, expect);
1839 } 1839 }
1840 1840
1841 public: 1841 public:
1842 /// Creates a matcher that can match and parse instructions based on bitstring. 1842 /// Creates a matcher that can match and parse instructions based on bitstring.
1843 static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type, 1843 static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
1844 const char* const name) { 1844 const char* const name) {
1845 const auto mask_expect = GetMaskAndExpect(bitstring); 1845 const auto [mask, expected] = GetMaskAndExpect(bitstring);
1846 return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type); 1846 return Matcher(name, mask, expected, op, type);
1847 } 1847 }
1848 }; 1848 };
1849 1849
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 76cfe8107..095660115 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h"
6#include "core/core.h" 7#include "core/core.h"
7#include "core/core_timing.h" 8#include "core/core_timing.h"
8#include "core/memory.h" 9#include "core/memory.h"
@@ -17,6 +18,8 @@
17 18
18namespace Tegra { 19namespace Tegra {
19 20
21MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
22
20GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) 23GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
21 : system{system}, renderer{renderer}, is_async{is_async} { 24 : system{system}, renderer{renderer}, is_async{is_async} {
22 auto& rasterizer{renderer.Rasterizer()}; 25 auto& rasterizer{renderer.Rasterizer()};
@@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
63 return *dma_pusher; 66 return *dma_pusher;
64} 67}
65 68
69void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
70 // Synced GPU, is always in sync
71 if (!is_async) {
72 return;
73 }
74 MICROPROFILE_SCOPE(GPU_wait);
75 while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
76 }
77}
78
66void GPU::IncrementSyncPoint(const u32 syncpoint_id) { 79void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
67 syncpoints[syncpoint_id]++; 80 syncpoints[syncpoint_id]++;
68 std::lock_guard lock{sync_mutex}; 81 std::lock_guard lock{sync_mutex};
@@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
326 block.sequence = regs.semaphore_sequence; 339 block.sequence = regs.semaphore_sequence;
327 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of 340 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
328 // CoreTiming 341 // CoreTiming
329 block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); 342 block.timestamp = system.CoreTiming().GetTicks();
330 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, 343 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
331 sizeof(block)); 344 sizeof(block));
332 } else { 345 } else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29fa8e95b..dbca19f35 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -177,6 +177,12 @@ public:
177 /// Returns a reference to the GPU DMA pusher. 177 /// Returns a reference to the GPU DMA pusher.
178 Tegra::DmaPusher& DmaPusher(); 178 Tegra::DmaPusher& DmaPusher();
179 179
180 // Waits for the GPU to finish working
181 virtual void WaitIdle() const = 0;
182
183 /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
184 void WaitFence(u32 syncpoint_id, u32 value) const;
185
180 void IncrementSyncPoint(u32 syncpoint_id); 186 void IncrementSyncPoint(u32 syncpoint_id);
181 187
182 u32 GetSyncpointValue(u32 syncpoint_id) const; 188 u32 GetSyncpointValue(u32 syncpoint_id) const;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index f2a3a390e..04222d060 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con
44 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); 44 interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
45} 45}
46 46
47void GPUAsynch::WaitIdle() const {
48 gpu_thread.WaitIdle();
49}
50
47} // namespace VideoCommon 51} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index a12f9bac4..1241ade1d 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,7 @@ public:
25 void FlushRegion(CacheAddr addr, u64 size) override; 25 void FlushRegion(CacheAddr addr, u64 size) override;
26 void InvalidateRegion(CacheAddr addr, u64 size) override; 26 void InvalidateRegion(CacheAddr addr, u64 size) override;
27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 27 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
28 void WaitIdle() const override;
28 29
29protected: 30protected:
30 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; 31 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 5eb1c461c..c71baee89 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,7 @@ public:
24 void FlushRegion(CacheAddr addr, u64 size) override; 24 void FlushRegion(CacheAddr addr, u64 size) override;
25 void InvalidateRegion(CacheAddr addr, u64 size) override; 25 void InvalidateRegion(CacheAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; 26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27 void WaitIdle() const override {}
27 28
28protected: 29protected:
29 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, 30 void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 5f039e4fd..758a37f14 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,8 +5,6 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
10#include "core/frontend/scope_acquire_window_context.h" 8#include "core/frontend/scope_acquire_window_context.h"
11#include "video_core/dma_pusher.h" 9#include "video_core/dma_pusher.h"
12#include "video_core/gpu.h" 10#include "video_core/gpu.h"
@@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
68 66
69void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { 67void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
70 thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; 68 thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
71 synchronization_event = system.CoreTiming().RegisterEvent(
72 "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
73} 69}
74 70
75void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 71void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
76 const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; 72 PushCommand(SubmitListCommand(std::move(entries)));
77 const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
78 system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
79} 73}
80 74
81void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 75void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
96 InvalidateRegion(addr, size); 90 InvalidateRegion(addr, size);
97} 91}
98 92
93void ThreadManager::WaitIdle() const {
94 while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
95 }
96}
97
99u64 ThreadManager::PushCommand(CommandData&& command_data) { 98u64 ThreadManager::PushCommand(CommandData&& command_data) {
100 const u64 fence{++state.last_fence}; 99 const u64 fence{++state.last_fence};
101 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 100 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
102 return fence; 101 return fence;
103} 102}
104 103
105MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
106void SynchState::WaitForSynchronization(u64 fence) {
107 while (signaled_fence.load() < fence)
108 ;
109}
110
111} // namespace VideoCommon::GPUThread 104} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 3ae0ec9f3..08dc96bb3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -21,9 +21,6 @@ class DmaPusher;
21 21
22namespace Core { 22namespace Core {
23class System; 23class System;
24namespace Timing {
25struct EventType;
26} // namespace Timing
27} // namespace Core 24} // namespace Core
28 25
29namespace VideoCommon::GPUThread { 26namespace VideoCommon::GPUThread {
@@ -89,8 +86,6 @@ struct CommandDataContainer {
89struct SynchState final { 86struct SynchState final {
90 std::atomic_bool is_running{true}; 87 std::atomic_bool is_running{true};
91 88
92 void WaitForSynchronization(u64 fence);
93
94 using CommandQueue = Common::SPSCQueue<CommandDataContainer>; 89 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
95 CommandQueue queue; 90 CommandQueue queue;
96 u64 last_fence{}; 91 u64 last_fence{};
@@ -121,6 +116,9 @@ public:
121 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 116 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
122 void FlushAndInvalidateRegion(CacheAddr addr, u64 size); 117 void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
123 118
119 // Wait until the gpu thread is idle.
120 void WaitIdle() const;
121
124private: 122private:
125 /// Pushes a command to be executed by the GPU thread 123 /// Pushes a command to be executed by the GPU thread
126 u64 PushCommand(CommandData&& command_data); 124 u64 PushCommand(CommandData&& command_data);
@@ -128,7 +126,6 @@ private:
128private: 126private:
129 SynchState state; 127 SynchState state;
130 Core::System& system; 128 Core::System& system;
131 Core::Timing::EventType* synchronization_event{};
132 std::thread thread; 129 std::thread thread;
133 std::thread::id thread_id; 130 std::thread::id thread_id;
134}; 131};
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index dbaeac6db..42031d80a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -11,6 +11,77 @@
11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); 11MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
12 12
13namespace Tegra { 13namespace Tegra {
14namespace {
15enum class Operation : u32 {
16 ALU = 0,
17 AddImmediate = 1,
18 ExtractInsert = 2,
19 ExtractShiftLeftImmediate = 3,
20 ExtractShiftLeftRegister = 4,
21 Read = 5,
22 Unused = 6, // This operation doesn't seem to be a valid encoding.
23 Branch = 7,
24};
25} // Anonymous namespace
26
27enum class MacroInterpreter::ALUOperation : u32 {
28 Add = 0,
29 AddWithCarry = 1,
30 Subtract = 2,
31 SubtractWithBorrow = 3,
32 // Operations 4-7 don't seem to be valid encodings.
33 Xor = 8,
34 Or = 9,
35 And = 10,
36 AndNot = 11,
37 Nand = 12
38};
39
40enum class MacroInterpreter::ResultOperation : u32 {
41 IgnoreAndFetch = 0,
42 Move = 1,
43 MoveAndSetMethod = 2,
44 FetchAndSend = 3,
45 MoveAndSend = 4,
46 FetchAndSetMethod = 5,
47 MoveAndSetMethodFetchAndSend = 6,
48 MoveAndSetMethodSend = 7
49};
50
51enum class MacroInterpreter::BranchCondition : u32 {
52 Zero = 0,
53 NotZero = 1,
54};
55
56union MacroInterpreter::Opcode {
57 u32 raw;
58 BitField<0, 3, Operation> operation;
59 BitField<4, 3, ResultOperation> result_operation;
60 BitField<4, 1, BranchCondition> branch_condition;
61 // If set on a branch, then the branch doesn't have a delay slot.
62 BitField<5, 1, u32> branch_annul;
63 BitField<7, 1, u32> is_exit;
64 BitField<8, 3, u32> dst;
65 BitField<11, 3, u32> src_a;
66 BitField<14, 3, u32> src_b;
67 // The signed immediate overlaps the second source operand and the alu operation.
68 BitField<14, 18, s32> immediate;
69
70 BitField<17, 5, ALUOperation> alu_operation;
71
72 // Bitfield instructions data
73 BitField<17, 5, u32> bf_src_bit;
74 BitField<22, 5, u32> bf_size;
75 BitField<27, 5, u32> bf_dst_bit;
76
77 u32 GetBitfieldMask() const {
78 return (1 << bf_size) - 1;
79 }
80
81 s32 GetBranchTarget() const {
82 return static_cast<s32>(immediate * sizeof(u32));
83 }
84};
14 85
15MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} 86MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
16 87
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 76b6a895b..631146d89 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -6,7 +6,6 @@
6 6
7#include <array> 7#include <array>
8#include <optional> 8#include <optional>
9#include <vector>
10 9
11#include "common/bit_field.h" 10#include "common/bit_field.h"
12#include "common/common_types.h" 11#include "common/common_types.h"
@@ -28,75 +27,11 @@ public:
28 void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); 27 void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
29 28
30private: 29private:
31 enum class Operation : u32 { 30 enum class ALUOperation : u32;
32 ALU = 0, 31 enum class BranchCondition : u32;
33 AddImmediate = 1, 32 enum class ResultOperation : u32;
34 ExtractInsert = 2,
35 ExtractShiftLeftImmediate = 3,
36 ExtractShiftLeftRegister = 4,
37 Read = 5,
38 Unused = 6, // This operation doesn't seem to be a valid encoding.
39 Branch = 7,
40 };
41
42 enum class ALUOperation : u32 {
43 Add = 0,
44 AddWithCarry = 1,
45 Subtract = 2,
46 SubtractWithBorrow = 3,
47 // Operations 4-7 don't seem to be valid encodings.
48 Xor = 8,
49 Or = 9,
50 And = 10,
51 AndNot = 11,
52 Nand = 12
53 };
54
55 enum class ResultOperation : u32 {
56 IgnoreAndFetch = 0,
57 Move = 1,
58 MoveAndSetMethod = 2,
59 FetchAndSend = 3,
60 MoveAndSend = 4,
61 FetchAndSetMethod = 5,
62 MoveAndSetMethodFetchAndSend = 6,
63 MoveAndSetMethodSend = 7
64 };
65 33
66 enum class BranchCondition : u32 { 34 union Opcode;
67 Zero = 0,
68 NotZero = 1,
69 };
70
71 union Opcode {
72 u32 raw;
73 BitField<0, 3, Operation> operation;
74 BitField<4, 3, ResultOperation> result_operation;
75 BitField<4, 1, BranchCondition> branch_condition;
76 BitField<5, 1, u32>
77 branch_annul; // If set on a branch, then the branch doesn't have a delay slot.
78 BitField<7, 1, u32> is_exit;
79 BitField<8, 3, u32> dst;
80 BitField<11, 3, u32> src_a;
81 BitField<14, 3, u32> src_b;
82 // The signed immediate overlaps the second source operand and the alu operation.
83 BitField<14, 18, s32> immediate;
84
85 BitField<17, 5, ALUOperation> alu_operation;
86
87 // Bitfield instructions data
88 BitField<17, 5, u32> bf_src_bit;
89 BitField<22, 5, u32> bf_size;
90 BitField<27, 5, u32> bf_dst_bit;
91
92 u32 GetBitfieldMask() const {
93 return (1 << bf_size) - 1;
94 }
95
96 s32 GetBranchTarget() const {
97 return static_cast<s32>(immediate * sizeof(u32));
98 }
99 };
100 35
101 union MethodAddress { 36 union MethodAddress {
102 u32 raw; 37 u32 raw;
@@ -149,9 +84,10 @@ private:
149 84
150 Engines::Maxwell3D& maxwell3d; 85 Engines::Maxwell3D& maxwell3d;
151 86
152 u32 pc; ///< Current program counter 87 /// Current program counter
153 std::optional<u32> 88 u32 pc;
154 delayed_pc; ///< Program counter to execute at after the delay slot is executed. 89 /// Program counter to execute at after the delay slot is executed.
90 std::optional<u32> delayed_pc;
155 91
156 static constexpr std::size_t NumMacroRegisters = 8; 92 static constexpr std::size_t NumMacroRegisters = 8;
157 93
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index ab71870ab..fe5f08ace 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
93 MortonCopy<true, PixelFormat::DXT23_SRGB>, 93 MortonCopy<true, PixelFormat::DXT23_SRGB>,
94 MortonCopy<true, PixelFormat::DXT45_SRGB>, 94 MortonCopy<true, PixelFormat::DXT45_SRGB>,
95 MortonCopy<true, PixelFormat::BC7U_SRGB>, 95 MortonCopy<true, PixelFormat::BC7U_SRGB>,
96 MortonCopy<true, PixelFormat::R4G4B4A4U>,
96 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, 97 MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
97 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, 98 MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
98 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, 99 MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
@@ -101,6 +102,16 @@ static constexpr ConversionArray morton_to_linear_fns = {
101 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, 102 MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
102 MortonCopy<true, PixelFormat::ASTC_2D_10X8>, 103 MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
103 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, 104 MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
105 MortonCopy<true, PixelFormat::ASTC_2D_6X6>,
106 MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
107 MortonCopy<true, PixelFormat::ASTC_2D_10X10>,
108 MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
109 MortonCopy<true, PixelFormat::ASTC_2D_12X12>,
110 MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
111 MortonCopy<true, PixelFormat::ASTC_2D_8X6>,
112 MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
113 MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
114 MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
104 MortonCopy<true, PixelFormat::Z32F>, 115 MortonCopy<true, PixelFormat::Z32F>,
105 MortonCopy<true, PixelFormat::Z16>, 116 MortonCopy<true, PixelFormat::Z16>,
106 MortonCopy<true, PixelFormat::Z24S8>, 117 MortonCopy<true, PixelFormat::Z24S8>,
@@ -162,6 +173,17 @@ static constexpr ConversionArray linear_to_morton_fns = {
162 MortonCopy<false, PixelFormat::DXT23_SRGB>, 173 MortonCopy<false, PixelFormat::DXT23_SRGB>,
163 MortonCopy<false, PixelFormat::DXT45_SRGB>, 174 MortonCopy<false, PixelFormat::DXT45_SRGB>,
164 MortonCopy<false, PixelFormat::BC7U_SRGB>, 175 MortonCopy<false, PixelFormat::BC7U_SRGB>,
176 MortonCopy<false, PixelFormat::R4G4B4A4U>,
177 nullptr,
178 nullptr,
179 nullptr,
180 nullptr,
181 nullptr,
182 nullptr,
183 nullptr,
184 nullptr,
185 nullptr,
186 nullptr,
165 nullptr, 187 nullptr,
166 nullptr, 188 nullptr,
167 nullptr, 189 nullptr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a85f730a8..9431d64ac 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
348} 348}
349 349
350void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { 350void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
351 std::lock_guard lock{pages_mutex};
351 const u64 page_start{addr >> Memory::PAGE_BITS}; 352 const u64 page_start{addr >> Memory::PAGE_BITS};
352 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; 353 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
353 354
@@ -974,7 +975,8 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag
974 } 975 }
975 const auto cbuf = entry.GetBindlessCBuf(); 976 const auto cbuf = entry.GetBindlessCBuf();
976 Tegra::Texture::TextureHandle tex_handle; 977 Tegra::Texture::TextureHandle tex_handle;
977 tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); 978 Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
979 tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second);
978 return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); 980 return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
979 }(); 981 }();
980 982
@@ -1004,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
1004 } 1006 }
1005 const auto cbuf = entry.GetBindlessCBuf(); 1007 const auto cbuf = entry.GetBindlessCBuf();
1006 Tegra::Texture::TextureHandle tex_handle; 1008 Tegra::Texture::TextureHandle tex_handle;
1007 tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); 1009 tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
1010 cbuf.first, cbuf.second);
1008 return compute.GetTextureInfo(tex_handle, entry.GetOffset()); 1011 return compute.GetTextureInfo(tex_handle, entry.GetOffset());
1009 }(); 1012 }();
1010 1013
@@ -1049,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
1049 } 1052 }
1050 const auto cbuf = entry.GetBindlessCBuf(); 1053 const auto cbuf = entry.GetBindlessCBuf();
1051 Tegra::Texture::TextureHandle tex_handle; 1054 Tegra::Texture::TextureHandle tex_handle;
1052 tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); 1055 tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute,
1056 cbuf.first, cbuf.second);
1053 return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; 1057 return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
1054 }(); 1058 }();
1055 SetupImage(bindpoint, tic, entry); 1059 SetupImage(bindpoint, tic, entry);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c10ebda3..c24a02d71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -9,6 +9,7 @@
9#include <cstddef> 9#include <cstddef>
10#include <map> 10#include <map>
11#include <memory> 11#include <memory>
12#include <mutex>
12#include <optional> 13#include <optional>
13#include <tuple> 14#include <tuple>
14#include <utility> 15#include <utility>
@@ -230,6 +231,8 @@ private:
230 231
231 using CachedPageMap = boost::icl::interval_map<u64, int>; 232 using CachedPageMap = boost::icl::interval_map<u64, int>;
232 CachedPageMap cached_pages; 233 CachedPageMap cached_pages;
234
235 std::mutex pages_mutex;
233}; 236};
234 237
235} // namespace OpenGL 238} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 42ca3b1bd..f1b89165d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,13 +3,16 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <mutex> 5#include <mutex>
6#include <optional>
7#include <string>
6#include <thread> 8#include <thread>
9#include <unordered_set>
7#include <boost/functional/hash.hpp> 10#include <boost/functional/hash.hpp>
8#include "common/assert.h" 11#include "common/assert.h"
9#include "common/hash.h"
10#include "common/scope_exit.h" 12#include "common/scope_exit.h"
11#include "core/core.h" 13#include "core/core.h"
12#include "core/frontend/emu_window.h" 14#include "core/frontend/emu_window.h"
15#include "video_core/engines/kepler_compute.h"
13#include "video_core/engines/maxwell_3d.h" 16#include "video_core/engines/maxwell_3d.h"
14#include "video_core/memory_manager.h" 17#include "video_core/memory_manager.h"
15#include "video_core/renderer_opengl/gl_rasterizer.h" 18#include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -21,18 +24,20 @@
21 24
22namespace OpenGL { 25namespace OpenGL {
23 26
27using Tegra::Engines::ShaderType;
28using VideoCommon::Shader::ConstBufferLocker;
24using VideoCommon::Shader::ProgramCode; 29using VideoCommon::Shader::ProgramCode;
30using VideoCommon::Shader::ShaderIR;
31
32namespace {
25 33
26// One UBO is always reserved for emulation values on staged shaders 34// One UBO is always reserved for emulation values on staged shaders
27constexpr u32 STAGE_RESERVED_UBOS = 1; 35constexpr u32 STAGE_RESERVED_UBOS = 1;
28 36
29struct UnspecializedShader { 37constexpr u32 STAGE_MAIN_OFFSET = 10;
30 std::string code; 38constexpr u32 KERNEL_MAIN_OFFSET = 0;
31 GLShader::ShaderEntries entries;
32 ProgramType program_type;
33};
34 39
35namespace { 40constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
36 41
37/// Gets the address for the specified shader stage program 42/// Gets the address for the specified shader stage program
38GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { 43GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program)
41 return gpu.regs.code_address.CodeAddress() + shader_config.offset; 46 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
42} 47}
43 48
49/// Gets if the current instruction offset is a scheduler instruction
50constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
51 // Sched instructions appear once every 4 instructions.
52 constexpr std::size_t SchedPeriod = 4;
53 const std::size_t absolute_offset = offset - main_offset;
54 return (absolute_offset % SchedPeriod) == 0;
55}
56
57/// Calculates the size of a program stream
58std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
59 constexpr std::size_t start_offset = 10;
60 // This is the encoded version of BRA that jumps to itself. All Nvidia
61 // shaders end with one.
62 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
63 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
64 std::size_t offset = start_offset;
65 while (offset < program.size()) {
66 const u64 instruction = program[offset];
67 if (!IsSchedInstruction(offset, start_offset)) {
68 if ((instruction & mask) == self_jumping_branch) {
69 // End on Maxwell's "nop" instruction
70 break;
71 }
72 if (instruction == 0) {
73 break;
74 }
75 }
76 offset++;
77 }
78 // The last instruction is included in the program size
79 return std::min(offset + 1, program.size());
80}
81
44/// Gets the shader program code from memory for the specified address 82/// Gets the shader program code from memory for the specified address
45ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, 83ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
46 const u8* host_ptr) { 84 const u8* host_ptr) {
@@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
51 }); 89 });
52 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), 90 memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
53 program_code.size() * sizeof(u64)); 91 program_code.size() * sizeof(u64));
92 program_code.resize(CalculateProgramSize(program_code));
54 return program_code; 93 return program_code;
55} 94}
56 95
@@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) {
71 } 110 }
72} 111}
73 112
74/// Gets if the current instruction offset is a scheduler instruction
75constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
76 // Sched instructions appear once every 4 instructions.
77 constexpr std::size_t SchedPeriod = 4;
78 const std::size_t absolute_offset = offset - main_offset;
79 return (absolute_offset % SchedPeriod) == 0;
80}
81
82/// Describes primitive behavior on geometry shaders 113/// Describes primitive behavior on geometry shaders
83constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { 114constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
84 switch (primitive_mode) { 115 switch (primitive_mode) {
@@ -121,110 +152,142 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) {
121 return {}; 152 return {};
122} 153}
123 154
124/// Calculates the size of a program stream
125std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
126 constexpr std::size_t start_offset = 10;
127 // This is the encoded version of BRA that jumps to itself. All Nvidia
128 // shaders end with one.
129 constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
130 constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
131 std::size_t offset = start_offset;
132 std::size_t size = start_offset * sizeof(u64);
133 while (offset < program.size()) {
134 const u64 instruction = program[offset];
135 if (!IsSchedInstruction(offset, start_offset)) {
136 if ((instruction & mask) == self_jumping_branch) {
137 // End on Maxwell's "nop" instruction
138 break;
139 }
140 if (instruction == 0) {
141 break;
142 }
143 }
144 size += sizeof(u64);
145 offset++;
146 }
147 // The last instruction is included in the program size
148 return std::min(size + sizeof(u64), program.size() * sizeof(u64));
149}
150
151/// Hashes one (or two) program streams 155/// Hashes one (or two) program streams
152u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, 156u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
153 const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { 157 const ProgramCode& code_b) {
154 if (size_a == 0) { 158 u64 unique_identifier = boost::hash_value(code);
155 size_a = CalculateProgramSize(code); 159 if (program_type == ProgramType::VertexA) {
156 } 160 // VertexA programs include two programs
157 u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); 161 boost::hash_combine(unique_identifier, boost::hash_value(code_b));
158 if (program_type != ProgramType::VertexA) {
159 return unique_identifier;
160 }
161 // VertexA programs include two programs
162
163 std::size_t seed = 0;
164 boost::hash_combine(seed, unique_identifier);
165
166 if (size_b == 0) {
167 size_b = CalculateProgramSize(code_b);
168 } 162 }
169 const u64 identifier_b = 163 return unique_identifier;
170 Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
171 boost::hash_combine(seed, identifier_b);
172 return static_cast<u64>(seed);
173} 164}
174 165
175/// Creates an unspecialized program from code streams 166/// Creates an unspecialized program from code streams
176GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, 167std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir,
177 ProgramCode program_code, ProgramCode program_code_b) { 168 const std::optional<ShaderIR>& ir_b) {
178 GLShader::ShaderSetup setup(program_code);
179 setup.program.size_a = CalculateProgramSize(program_code);
180 setup.program.size_b = 0;
181 if (program_type == ProgramType::VertexA) {
182 // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
183 // Conventional HW does not support this, so we combine VertexA and VertexB into one
184 // stage here.
185 setup.SetProgramB(program_code_b);
186 setup.program.size_b = CalculateProgramSize(program_code_b);
187 }
188 setup.program.unique_identifier = GetUniqueIdentifier(
189 program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
190
191 switch (program_type) { 169 switch (program_type) {
192 case ProgramType::VertexA: 170 case ProgramType::VertexA:
193 case ProgramType::VertexB: 171 case ProgramType::VertexB:
194 return GLShader::GenerateVertexShader(device, setup); 172 return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
195 case ProgramType::Geometry: 173 case ProgramType::Geometry:
196 return GLShader::GenerateGeometryShader(device, setup); 174 return GLShader::GenerateGeometryShader(device, ir);
197 case ProgramType::Fragment: 175 case ProgramType::Fragment:
198 return GLShader::GenerateFragmentShader(device, setup); 176 return GLShader::GenerateFragmentShader(device, ir);
199 case ProgramType::Compute: 177 case ProgramType::Compute:
200 return GLShader::GenerateComputeShader(device, setup); 178 return GLShader::GenerateComputeShader(device, ir);
201 default: 179 default:
202 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); 180 UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
203 return {}; 181 return {};
204 } 182 }
205} 183}
206 184
207CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, 185constexpr const char* GetProgramTypeName(ProgramType program_type) {
208 ProgramType program_type, const ProgramVariant& variant, 186 switch (program_type) {
209 bool hint_retrievable = false) { 187 case ProgramType::VertexA:
188 case ProgramType::VertexB:
189 return "VS";
190 case ProgramType::TessellationControl:
191 return "TCS";
192 case ProgramType::TessellationEval:
193 return "TES";
194 case ProgramType::Geometry:
195 return "GS";
196 case ProgramType::Fragment:
197 return "FS";
198 case ProgramType::Compute:
199 return "CS";
200 }
201 return "UNK";
202}
203
204Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) {
205 switch (program_type) {
206 case ProgramType::VertexA:
207 case ProgramType::VertexB:
208 return Tegra::Engines::ShaderType::Vertex;
209 case ProgramType::TessellationControl:
210 return Tegra::Engines::ShaderType::TesselationControl;
211 case ProgramType::TessellationEval:
212 return Tegra::Engines::ShaderType::TesselationEval;
213 case ProgramType::Geometry:
214 return Tegra::Engines::ShaderType::Geometry;
215 case ProgramType::Fragment:
216 return Tegra::Engines::ShaderType::Fragment;
217 case ProgramType::Compute:
218 return Tegra::Engines::ShaderType::Compute;
219 }
220 UNREACHABLE();
221 return {};
222}
223
224std::string GetShaderId(u64 unique_identifier, ProgramType program_type) {
225 return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier);
226}
227
228Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(
229 Core::System& system, ProgramType program_type) {
230 if (program_type == ProgramType::Compute) {
231 return system.GPU().KeplerCompute();
232 } else {
233 return system.GPU().Maxwell3D();
234 }
235}
236
237std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) {
238 return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type),
239 GetConstBufferEngineInterface(system, program_type));
240}
241
242void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
243 for (const auto& key : usage.keys) {
244 const auto [buffer, offset] = key.first;
245 locker.InsertKey(buffer, offset, key.second);
246 }
247 for (const auto& [offset, sampler] : usage.bound_samplers) {
248 locker.InsertBoundSampler(offset, sampler);
249 }
250 for (const auto& [key, sampler] : usage.bindless_samplers) {
251 const auto [buffer, offset] = key;
252 locker.InsertBindlessSampler(buffer, offset, sampler);
253 }
254}
255
256CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
257 const ProgramCode& program_code, const ProgramCode& program_code_b,
258 const ProgramVariant& variant, ConstBufferLocker& locker,
259 bool hint_retrievable = false) {
260 LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
261
262 const bool is_compute = program_type == ProgramType::Compute;
263 const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
264 const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker);
265 std::optional<ShaderIR> ir_b;
266 if (!program_code_b.empty()) {
267 ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker);
268 }
269 const auto entries = GLShader::GetEntries(ir);
270
210 auto base_bindings{variant.base_bindings}; 271 auto base_bindings{variant.base_bindings};
211 const auto primitive_mode{variant.primitive_mode}; 272 const auto primitive_mode{variant.primitive_mode};
212 const auto texture_buffer_usage{variant.texture_buffer_usage}; 273 const auto texture_buffer_usage{variant.texture_buffer_usage};
213 274
214 std::string source = R"(#version 430 core 275 std::string source = fmt::format(R"(// {}
276#version 430 core
215#extension GL_ARB_separate_shader_objects : enable 277#extension GL_ARB_separate_shader_objects : enable
216#extension GL_ARB_shader_viewport_layer_array : enable 278#extension GL_ARB_shader_viewport_layer_array : enable
217#extension GL_EXT_shader_image_load_formatted : enable 279#extension GL_EXT_shader_image_load_formatted : enable
218#extension GL_NV_gpu_shader5 : enable 280#extension GL_NV_gpu_shader5 : enable
219#extension GL_NV_shader_thread_group : enable 281#extension GL_NV_shader_thread_group : enable
220#extension GL_NV_shader_thread_shuffle : enable 282#extension GL_NV_shader_thread_shuffle : enable
221)"; 283)",
222 if (program_type == ProgramType::Compute) { 284 GetShaderId(unique_identifier, program_type));
285 if (is_compute) {
223 source += "#extension GL_ARB_compute_variable_group_size : require\n"; 286 source += "#extension GL_ARB_compute_variable_group_size : require\n";
224 } 287 }
225 source += '\n'; 288 source += '\n';
226 289
227 if (program_type != ProgramType::Compute) { 290 if (!is_compute) {
228 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); 291 source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
229 } 292 }
230 293
@@ -268,7 +331,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
268 } 331 }
269 332
270 source += '\n'; 333 source += '\n';
271 source += code; 334 source += GenerateGLSL(device, program_type, ir, ir_b);
272 335
273 OGLShader shader; 336 OGLShader shader;
274 shader.Create(source.c_str(), GetShaderType(program_type)); 337 shader.Create(source.c_str(), GetShaderType(program_type));
@@ -278,85 +341,97 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
278 return program; 341 return program;
279} 342}
280 343
281std::set<GLenum> GetSupportedFormats() { 344std::unordered_set<GLenum> GetSupportedFormats() {
282 std::set<GLenum> supported_formats;
283
284 GLint num_formats{}; 345 GLint num_formats{};
285 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); 346 glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
286 347
287 std::vector<GLint> formats(num_formats); 348 std::vector<GLint> formats(num_formats);
288 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); 349 glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
289 350
290 for (const GLint format : formats) 351 std::unordered_set<GLenum> supported_formats;
352 for (const GLint format : formats) {
291 supported_formats.insert(static_cast<GLenum>(format)); 353 supported_formats.insert(static_cast<GLenum>(format));
354 }
292 return supported_formats; 355 return supported_formats;
293} 356}
294 357
295} // Anonymous namespace 358} // Anonymous namespace
296 359
297CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, 360CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
298 GLShader::ProgramResult result) 361 GLShader::ShaderEntries entries, ProgramCode program_code,
299 : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, 362 ProgramCode program_code_b)
300 unique_identifier{params.unique_identifier}, program_type{program_type}, 363 : RasterizerCacheObject{params.host_ptr}, system{params.system},
301 disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, 364 disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
302 entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} 365 unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
366 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
367 if (!params.precompiled_variants) {
368 return;
369 }
370 for (const auto& pair : *params.precompiled_variants) {
371 auto locker = MakeLocker(system, program_type);
372 const auto& usage = pair->first;
373 FillLocker(*locker, usage);
374
375 std::unique_ptr<LockerVariant>* locker_variant = nullptr;
376 const auto it =
377 std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
378 return variant->locker->HasEqualKeys(*locker);
379 });
380 if (it == locker_variants.end()) {
381 locker_variant = &locker_variants.emplace_back();
382 *locker_variant = std::make_unique<LockerVariant>();
383 locker_variant->get()->locker = std::move(locker);
384 } else {
385 locker_variant = &*it;
386 }
387 locker_variant->get()->programs.emplace(usage.variant, pair->second);
388 }
389}
303 390
304Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 391Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
305 Maxwell::ShaderProgram program_type, 392 Maxwell::ShaderProgram program_type,
306 ProgramCode&& program_code, 393 ProgramCode program_code, ProgramCode program_code_b) {
307 ProgramCode&& program_code_b) {
308 const auto code_size{CalculateProgramSize(program_code)};
309 const auto code_size_b{CalculateProgramSize(program_code_b)};
310 auto result{
311 CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
312 if (result.first.empty()) {
313 // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
314 return {};
315 }
316
317 params.disk_cache.SaveRaw(ShaderDiskCacheRaw( 394 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
318 params.unique_identifier, GetProgramType(program_type), 395 params.unique_identifier, GetProgramType(program_type), program_code, program_code_b));
319 static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), 396
320 std::move(program_code), std::move(program_code_b))); 397 ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)));
321 398 const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
399 // TODO(Rodrigo): Handle VertexA shaders
400 // std::optional<ShaderIR> ir_b;
401 // if (!program_code_b.empty()) {
402 // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET);
403 // }
322 return std::shared_ptr<CachedShader>( 404 return std::shared_ptr<CachedShader>(
323 new CachedShader(params, GetProgramType(program_type), std::move(result))); 405 new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir),
406 std::move(program_code), std::move(program_code_b)));
324} 407}
325 408
326Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, 409Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
327 Maxwell::ShaderProgram program_type, 410 params.disk_cache.SaveRaw(
328 GLShader::ProgramResult result) { 411 ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code));
329 return std::shared_ptr<CachedShader>(
330 new CachedShader(params, GetProgramType(program_type), std::move(result)));
331}
332
333Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
334 auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
335
336 const auto code_size{CalculateProgramSize(code)};
337 params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
338 static_cast<u32>(code_size / sizeof(u64)), 0,
339 std::move(code), {}));
340 412
341 return std::shared_ptr<CachedShader>( 413 ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute);
342 new CachedShader(params, ProgramType::Compute, std::move(result))); 414 const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
415 return std::shared_ptr<CachedShader>(new CachedShader(
416 params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
343} 417}
344 418
345Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, 419Shader CachedShader::CreateFromCache(const ShaderParameters& params,
346 GLShader::ProgramResult result) { 420 const UnspecializedShader& unspecialized) {
347 return std::shared_ptr<CachedShader>( 421 return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type,
348 new CachedShader(params, ProgramType::Compute, std::move(result))); 422 unspecialized.entries, unspecialized.code,
423 unspecialized.code_b));
349} 424}
350 425
351std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { 426std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
352 const auto [entry, is_cache_miss] = programs.try_emplace(variant); 427 UpdateVariant();
428
429 const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant);
353 auto& program = entry->second; 430 auto& program = entry->second;
354 if (is_cache_miss) { 431 if (is_cache_miss) {
355 program = TryLoadProgram(variant); 432 program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
356 if (!program) { 433 variant, *curr_variant->locker);
357 program = SpecializeShader(code, entries, program_type, variant); 434 disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker));
358 disk_cache.SaveUsage(GetUsage(variant));
359 }
360 435
361 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); 436 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
362 } 437 }
@@ -372,18 +447,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
372 return {program->handle, base_bindings}; 447 return {program->handle, base_bindings};
373} 448}
374 449
375CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { 450void CachedShader::UpdateVariant() {
376 const auto found = precompiled_programs.find(GetUsage(variant)); 451 if (curr_variant && !curr_variant->locker->IsConsistent()) {
377 if (found == precompiled_programs.end()) { 452 curr_variant = nullptr;
378 return {}; 453 }
454 if (!curr_variant) {
455 for (auto& variant : locker_variants) {
456 if (variant->locker->IsConsistent()) {
457 curr_variant = variant.get();
458 }
459 }
460 }
461 if (!curr_variant) {
462 auto& new_variant = locker_variants.emplace_back();
463 new_variant = std::make_unique<LockerVariant>();
464 new_variant->locker = MakeLocker(system, program_type);
465 curr_variant = new_variant.get();
379 } 466 }
380 return found->second;
381} 467}
382 468
383ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { 469ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
470 const ConstBufferLocker& locker) const {
384 ShaderDiskCacheUsage usage; 471 ShaderDiskCacheUsage usage;
385 usage.unique_identifier = unique_identifier; 472 usage.unique_identifier = unique_identifier;
386 usage.variant = variant; 473 usage.variant = variant;
474 usage.keys = locker.GetKeys();
475 usage.bound_samplers = locker.GetBoundSamplers();
476 usage.bindless_samplers = locker.GetBindlessSamplers();
387 return usage; 477 return usage;
388} 478}
389 479
@@ -399,18 +489,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
399 return; 489 return;
400 } 490 }
401 const auto [raws, shader_usages] = *transferable; 491 const auto [raws, shader_usages] = *transferable;
402 492 if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
403 auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
404
405 const auto supported_formats{GetSupportedFormats()};
406 const auto unspecialized_shaders{
407 GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
408 if (stop_loading) {
409 return; 493 return;
410 } 494 }
411 495
412 // Track if precompiled cache was altered during loading to know if we have to serialize the 496 const auto dumps = disk_cache.LoadPrecompiled();
413 // virtual precompiled cache file back to the hard drive 497 const auto supported_formats = GetSupportedFormats();
498
499 // Track if precompiled cache was altered during loading to know if we have to
500 // serialize the virtual precompiled cache file back to the hard drive
414 bool precompiled_cache_altered = false; 501 bool precompiled_cache_altered = false;
415 502
416 // Inform the frontend about shader build initialization 503 // Inform the frontend about shader build initialization
@@ -433,9 +520,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
433 return; 520 return;
434 } 521 }
435 const auto& usage{shader_usages[i]}; 522 const auto& usage{shader_usages[i]};
436 LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
437 usage.unique_identifier, i, shader_usages.size());
438
439 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; 523 const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
440 const auto dump{dumps.find(usage)}; 524 const auto dump{dumps.find(usage)};
441 525
@@ -449,21 +533,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
449 } 533 }
450 } 534 }
451 if (!shader) { 535 if (!shader) {
452 shader = SpecializeShader(unspecialized.code, unspecialized.entries, 536 auto locker{MakeLocker(system, unspecialized.program_type)};
453 unspecialized.program_type, usage.variant, true); 537 FillLocker(*locker, usage);
538 shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
539 unspecialized.code, unspecialized.code_b, usage.variant,
540 *locker, true);
454 } 541 }
455 542
456 std::scoped_lock lock(mutex); 543 std::scoped_lock lock{mutex};
457 if (callback) { 544 if (callback) {
458 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, 545 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
459 shader_usages.size()); 546 shader_usages.size());
460 } 547 }
461 548
462 precompiled_programs.emplace(usage, std::move(shader)); 549 precompiled_programs.emplace(usage, std::move(shader));
550
551 // TODO(Rodrigo): Is there a better way to do this?
552 precompiled_variants[usage.unique_identifier].push_back(
553 precompiled_programs.find(usage));
463 } 554 }
464 }; 555 };
465 556
466 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; 557 const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
467 const std::size_t bucket_size{shader_usages.size() / num_workers}; 558 const std::size_t bucket_size{shader_usages.size() / num_workers};
468 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); 559 std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
469 std::vector<std::thread> threads(num_workers); 560 std::vector<std::thread> threads(num_workers);
@@ -483,7 +574,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
483 if (compilation_failed) { 574 if (compilation_failed) {
484 // Invalidate the precompiled cache if a shader dumped shader was rejected 575 // Invalidate the precompiled cache if a shader dumped shader was rejected
485 disk_cache.InvalidatePrecompiled(); 576 disk_cache.InvalidatePrecompiled();
486 dumps.clear();
487 precompiled_cache_altered = true; 577 precompiled_cache_altered = true;
488 return; 578 return;
489 } 579 }
@@ -491,8 +581,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
491 return; 581 return;
492 } 582 }
493 583
494 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before 584 // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
495 // precompiling them 585 // before precompiling them
496 586
497 for (std::size_t i = 0; i < shader_usages.size(); ++i) { 587 for (std::size_t i = 0; i < shader_usages.size(); ++i) {
498 const auto& usage{shader_usages[i]}; 588 const auto& usage{shader_usages[i]};
@@ -508,9 +598,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
508 } 598 }
509} 599}
510 600
511CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( 601const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
512 const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { 602 const auto it = precompiled_variants.find(unique_identifier);
603 return it == precompiled_variants.end() ? nullptr : &it->second;
604}
513 605
606CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
607 const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
514 if (supported_formats.find(dump.binary_format) == supported_formats.end()) { 608 if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
515 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); 609 LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
516 return {}; 610 return {};
@@ -532,56 +626,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
532 return shader; 626 return shader;
533} 627}
534 628
535std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( 629bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
536 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, 630 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
537 const std::vector<ShaderDiskCacheRaw>& raws, 631 const std::vector<ShaderDiskCacheRaw>& raws) {
538 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
539 std::unordered_map<u64, UnspecializedShader> unspecialized;
540
541 if (callback) { 632 if (callback) {
542 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); 633 callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
543 } 634 }
544 635
545 for (std::size_t i = 0; i < raws.size(); ++i) { 636 for (std::size_t i = 0; i < raws.size(); ++i) {
546 if (stop_loading) { 637 if (stop_loading) {
547 return {}; 638 return false;
548 } 639 }
549 const auto& raw{raws[i]}; 640 const auto& raw{raws[i]};
550 const u64 unique_identifier{raw.GetUniqueIdentifier()}; 641 const u64 unique_identifier{raw.GetUniqueIdentifier()};
551 const u64 calculated_hash{ 642 const u64 calculated_hash{
552 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; 643 GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
553 if (unique_identifier != calculated_hash) { 644 if (unique_identifier != calculated_hash) {
554 LOG_ERROR( 645 LOG_ERROR(Render_OpenGL,
555 Render_OpenGL, 646 "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
556 "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", 647 "removing shader cache",
557 raw.GetUniqueIdentifier(), calculated_hash); 648 raw.GetUniqueIdentifier(), calculated_hash);
558 disk_cache.InvalidateTransferable(); 649 disk_cache.InvalidateTransferable();
559 return {}; 650 return false;
560 } 651 }
561 652
562 GLShader::ProgramResult result; 653 const u32 main_offset =
563 if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { 654 raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
564 // If it's stored in the precompiled file, avoid decompiling it here 655 ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType()));
565 const auto& stored_decompiled{it->second}; 656 const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker);
566 result = {stored_decompiled.code, stored_decompiled.entries}; 657 // TODO(Rodrigo): Handle VertexA shaders
567 } else { 658 // std::optional<ShaderIR> ir_b;
568 // Otherwise decompile the shader at boot and save the result to the decompiled file 659 // if (raw.HasProgramA()) {
569 result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), 660 // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
570 raw.GetProgramCodeB()); 661 // }
571 disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); 662
572 } 663 UnspecializedShader unspecialized;
573 664 unspecialized.entries = GLShader::GetEntries(ir);
574 precompiled_shaders.insert({unique_identifier, result}); 665 unspecialized.program_type = raw.GetProgramType();
575 666 unspecialized.code = raw.GetProgramCode();
576 unspecialized.insert( 667 unspecialized.code_b = raw.GetProgramCodeB();
577 {raw.GetUniqueIdentifier(), 668 unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
578 {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
579 669
580 if (callback) { 670 if (callback) {
581 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); 671 callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
582 } 672 }
583 } 673 }
584 return unspecialized; 674 return true;
585} 675}
586 676
587Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 677Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -590,37 +680,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
590 } 680 }
591 681
592 auto& memory_manager{system.GPU().MemoryManager()}; 682 auto& memory_manager{system.GPU().MemoryManager()};
593 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 683 const GPUVAddr address{GetShaderAddress(system, program)};
594 684
595 // Look up shader in the cache based on address 685 // Look up shader in the cache based on address
596 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 686 const auto host_ptr{memory_manager.GetPointer(address)};
597 Shader shader{TryGet(host_ptr)}; 687 Shader shader{TryGet(host_ptr)};
598 if (shader) { 688 if (shader) {
599 return last_shaders[static_cast<std::size_t>(program)] = shader; 689 return last_shaders[static_cast<std::size_t>(program)] = shader;
600 } 690 }
601 691
602 // No shader found - create a new one 692 // No shader found - create a new one
603 ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; 693 ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
604 ProgramCode program_code_b; 694 ProgramCode code_b;
605 const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; 695 if (program == Maxwell::ShaderProgram::VertexA) {
606 if (is_program_a) { 696 const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
607 const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; 697 code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
608 program_code_b = GetShaderCode(memory_manager, program_addr_b, 698 }
609 memory_manager.GetPointer(program_addr_b)); 699
610 } 700 const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b);
611 701 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
612 const auto unique_identifier = 702 const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
613 GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); 703 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
614 const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; 704 cpu_addr, host_ptr, unique_identifier};
615 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, 705
616 host_ptr, unique_identifier}; 706 const auto found = unspecialized_shaders.find(unique_identifier);
617 707 if (found == unspecialized_shaders.end()) {
618 const auto found = precompiled_shaders.find(unique_identifier); 708 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
619 if (found == precompiled_shaders.end()) { 709 std::move(code_b));
620 shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
621 std::move(program_code_b));
622 } else { 710 } else {
623 shader = CachedShader::CreateStageFromCache(params, program, found->second); 711 shader = CachedShader::CreateFromCache(params, found->second);
624 } 712 }
625 Register(shader); 713 Register(shader);
626 714
@@ -638,15 +726,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
638 // No kernel found - create a new one 726 // No kernel found - create a new one
639 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; 727 auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
640 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; 728 const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
729 const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
641 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; 730 const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
642 const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, 731 const ShaderParameters params{system, disk_cache, precompiled_variants, device,
643 host_ptr, unique_identifier}; 732 cpu_addr, host_ptr, unique_identifier};
644 733
645 const auto found = precompiled_shaders.find(unique_identifier); 734 const auto found = unspecialized_shaders.find(unique_identifier);
646 if (found == precompiled_shaders.end()) { 735 if (found == unspecialized_shaders.end()) {
647 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 736 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
648 } else { 737 } else {
649 kernel = CachedShader::CreateKernelFromCache(params, found->second); 738 kernel = CachedShader::CreateFromCache(params, found->second);
650 } 739 }
651 740
652 Register(kernel); 741 Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index de195cc5d..6bd7c9cf1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -8,9 +8,10 @@
8#include <atomic> 8#include <atomic>
9#include <bitset> 9#include <bitset>
10#include <memory> 10#include <memory>
11#include <set> 11#include <string>
12#include <tuple> 12#include <tuple>
13#include <unordered_map> 13#include <unordered_map>
14#include <unordered_set>
14#include <vector> 15#include <vector>
15 16
16#include <glad/glad.h> 17#include <glad/glad.h>
@@ -20,6 +21,8 @@
20#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
21#include "video_core/renderer_opengl/gl_shader_decompiler.h" 22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
22#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
24#include "video_core/shader/const_buffer_locker.h"
25#include "video_core/shader/shader_ir.h"
23 26
24namespace Core { 27namespace Core {
25class System; 28class System;
@@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>;
40using CachedProgram = std::shared_ptr<OGLProgram>; 43using CachedProgram = std::shared_ptr<OGLProgram>;
41using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
42using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; 45using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
43using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; 46using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
47
48struct UnspecializedShader {
49 GLShader::ShaderEntries entries;
50 ProgramType program_type;
51 ProgramCode code;
52 ProgramCode code_b;
53};
44 54
45struct ShaderParameters { 55struct ShaderParameters {
56 Core::System& system;
46 ShaderDiskCacheOpenGL& disk_cache; 57 ShaderDiskCacheOpenGL& disk_cache;
47 const PrecompiledPrograms& precompiled_programs; 58 const PrecompiledVariants* precompiled_variants;
48 const Device& device; 59 const Device& device;
49 VAddr cpu_addr; 60 VAddr cpu_addr;
50 u8* host_ptr; 61 u8* host_ptr;
@@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject {
55public: 66public:
56 static Shader CreateStageFromMemory(const ShaderParameters& params, 67 static Shader CreateStageFromMemory(const ShaderParameters& params,
57 Maxwell::ShaderProgram program_type, 68 Maxwell::ShaderProgram program_type,
58 ProgramCode&& program_code, ProgramCode&& program_code_b); 69 ProgramCode program_code, ProgramCode program_code_b);
59 70 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
60 static Shader CreateStageFromCache(const ShaderParameters& params,
61 Maxwell::ShaderProgram program_type,
62 GLShader::ProgramResult result);
63 71
64 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); 72 static Shader CreateFromCache(const ShaderParameters& params,
65 73 const UnspecializedShader& unspecialized);
66 static Shader CreateKernelFromCache(const ShaderParameters& params,
67 GLShader::ProgramResult result);
68 74
69 VAddr GetCpuAddr() const override { 75 VAddr GetCpuAddr() const override {
70 return cpu_addr; 76 return cpu_addr;
71 } 77 }
72 78
73 std::size_t GetSizeInBytes() const override { 79 std::size_t GetSizeInBytes() const override {
74 return shader_length; 80 return program_code.size() * sizeof(u64);
75 } 81 }
76 82
77 /// Gets the shader entries for the shader 83 /// Gets the shader entries for the shader
@@ -83,24 +89,36 @@ public:
83 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); 89 std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
84 90
85private: 91private:
92 struct LockerVariant {
93 std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
94 std::unordered_map<ProgramVariant, CachedProgram> programs;
95 };
96
86 explicit CachedShader(const ShaderParameters& params, ProgramType program_type, 97 explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
87 GLShader::ProgramResult result); 98 GLShader::ShaderEntries entries, ProgramCode program_code,
99 ProgramCode program_code_b);
88 100
89 CachedProgram TryLoadProgram(const ProgramVariant& variant) const; 101 void UpdateVariant();
90 102
91 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; 103 ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
104 const VideoCommon::Shader::ConstBufferLocker& locker) const;
105
106 Core::System& system;
107 ShaderDiskCacheOpenGL& disk_cache;
108 const Device& device;
92 109
93 VAddr cpu_addr{}; 110 VAddr cpu_addr{};
111
94 u64 unique_identifier{}; 112 u64 unique_identifier{};
95 ProgramType program_type{}; 113 ProgramType program_type{};
96 ShaderDiskCacheOpenGL& disk_cache;
97 const PrecompiledPrograms& precompiled_programs;
98 114
99 GLShader::ShaderEntries entries; 115 GLShader::ShaderEntries entries;
100 std::string code;
101 std::size_t shader_length{};
102 116
103 std::unordered_map<ProgramVariant, CachedProgram> programs; 117 ProgramCode program_code;
118 ProgramCode program_code_b;
119
120 LockerVariant* curr_variant = nullptr;
121 std::vector<std::unique_ptr<LockerVariant>> locker_variants;
104}; 122};
105 123
106class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 124class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -123,21 +141,26 @@ protected:
123 void FlushObjectInner(const Shader& object) override {} 141 void FlushObjectInner(const Shader& object) override {}
124 142
125private: 143private:
126 std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( 144 bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
127 const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, 145 const VideoCore::DiskResourceLoadCallback& callback,
128 const std::vector<ShaderDiskCacheRaw>& raws, 146 const std::vector<ShaderDiskCacheRaw>& raws);
129 const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
130 147
131 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, 148 CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
132 const std::set<GLenum>& supported_formats); 149 const std::unordered_set<GLenum>& supported_formats);
150
151 const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
133 152
134 Core::System& system; 153 Core::System& system;
135 Core::Frontend::EmuWindow& emu_window; 154 Core::Frontend::EmuWindow& emu_window;
136 const Device& device; 155 const Device& device;
156
137 ShaderDiskCacheOpenGL disk_cache; 157 ShaderDiskCacheOpenGL disk_cache;
138 158
139 PrecompiledShaders precompiled_shaders;
140 PrecompiledPrograms precompiled_programs; 159 PrecompiledPrograms precompiled_programs;
160 std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
161
162 std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
163
141 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 164 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
142}; 165};
143 166
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6a610a3bc..030550c53 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -415,27 +415,6 @@ public:
415 return code.GetResult(); 415 return code.GetResult();
416 } 416 }
417 417
418 ShaderEntries GetShaderEntries() const {
419 ShaderEntries entries;
420 for (const auto& cbuf : ir.GetConstantBuffers()) {
421 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
422 cbuf.first);
423 }
424 for (const auto& sampler : ir.GetSamplers()) {
425 entries.samplers.emplace_back(sampler);
426 }
427 for (const auto& [offset, image] : ir.GetImages()) {
428 entries.images.emplace_back(image);
429 }
430 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
431 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
432 usage.is_read, usage.is_written);
433 }
434 entries.clip_distances = ir.GetClipDistances();
435 entries.shader_length = ir.GetLength();
436 return entries;
437 }
438
439private: 418private:
440 friend class ASTDecompiler; 419 friend class ASTDecompiler;
441 friend class ExprDecompiler; 420 friend class ExprDecompiler;
@@ -1148,7 +1127,7 @@ private:
1148 for (const auto& variant : extras) { 1127 for (const auto& variant : extras) {
1149 if (const auto argument = std::get_if<TextureArgument>(&variant)) { 1128 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
1150 expr += GenerateTextureArgument(*argument); 1129 expr += GenerateTextureArgument(*argument);
1151 } else if (std::get_if<TextureAoffi>(&variant)) { 1130 } else if (std::holds_alternative<TextureAoffi>(variant)) {
1152 expr += GenerateTextureAoffi(meta->aoffi); 1131 expr += GenerateTextureAoffi(meta->aoffi);
1153 } else { 1132 } else {
1154 UNREACHABLE(); 1133 UNREACHABLE();
@@ -1158,8 +1137,8 @@ private:
1158 return expr + ')'; 1137 return expr + ')';
1159 } 1138 }
1160 1139
1161 std::string GenerateTextureArgument(TextureArgument argument) { 1140 std::string GenerateTextureArgument(const TextureArgument& argument) {
1162 const auto [type, operand] = argument; 1141 const auto& [type, operand] = argument;
1163 if (operand == nullptr) { 1142 if (operand == nullptr) {
1164 return {}; 1143 return {};
1165 } 1144 }
@@ -1235,7 +1214,7 @@ private:
1235 1214
1236 std::string BuildImageValues(Operation operation) { 1215 std::string BuildImageValues(Operation operation) {
1237 constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; 1216 constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
1238 const auto meta{std::get<MetaImage>(operation.GetMeta())}; 1217 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1239 1218
1240 const std::size_t values_count{meta.values.size()}; 1219 const std::size_t values_count{meta.values.size()};
1241 std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); 1220 std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
@@ -1780,14 +1759,14 @@ private:
1780 return {"0", Type::Int}; 1759 return {"0", Type::Int};
1781 } 1760 }
1782 1761
1783 const auto meta{std::get<MetaImage>(operation.GetMeta())}; 1762 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1784 return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), 1763 return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
1785 BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), 1764 BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
1786 Type::Uint}; 1765 Type::Uint};
1787 } 1766 }
1788 1767
1789 Expression ImageStore(Operation operation) { 1768 Expression ImageStore(Operation operation) {
1790 const auto meta{std::get<MetaImage>(operation.GetMeta())}; 1769 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1791 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), 1770 code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
1792 BuildIntegerCoordinates(operation), BuildImageValues(operation)); 1771 BuildIntegerCoordinates(operation), BuildImageValues(operation));
1793 return {}; 1772 return {};
@@ -1795,7 +1774,7 @@ private:
1795 1774
1796 template <const std::string_view& opname> 1775 template <const std::string_view& opname>
1797 Expression AtomicImage(Operation operation) { 1776 Expression AtomicImage(Operation operation) {
1798 const auto meta{std::get<MetaImage>(operation.GetMeta())}; 1777 const auto& meta{std::get<MetaImage>(operation.GetMeta())};
1799 ASSERT(meta.values.size() == 1); 1778 ASSERT(meta.values.size() == 1);
1800 1779
1801 return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), 1780 return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
@@ -2246,7 +2225,7 @@ private:
2246 code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); 2225 code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
2247 } 2226 }
2248 2227
2249 std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { 2228 std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
2250 return fmt::format("{}_{}_{}", name, index, suffix); 2229 return fmt::format("{}_{}_{}", name, index, suffix);
2251 } 2230 }
2252 2231
@@ -2271,17 +2250,15 @@ private:
2271 ShaderWriter code; 2250 ShaderWriter code;
2272}; 2251};
2273 2252
2274static constexpr std::string_view flow_var = "flow_var_";
2275
2276std::string GetFlowVariable(u32 i) { 2253std::string GetFlowVariable(u32 i) {
2277 return fmt::format("{}{}", flow_var, i); 2254 return fmt::format("flow_var_{}", i);
2278} 2255}
2279 2256
2280class ExprDecompiler { 2257class ExprDecompiler {
2281public: 2258public:
2282 explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} 2259 explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
2283 2260
2284 void operator()(VideoCommon::Shader::ExprAnd& expr) { 2261 void operator()(const ExprAnd& expr) {
2285 inner += "( "; 2262 inner += "( ";
2286 std::visit(*this, *expr.operand1); 2263 std::visit(*this, *expr.operand1);
2287 inner += " && "; 2264 inner += " && ";
@@ -2289,7 +2266,7 @@ public:
2289 inner += ')'; 2266 inner += ')';
2290 } 2267 }
2291 2268
2292 void operator()(VideoCommon::Shader::ExprOr& expr) { 2269 void operator()(const ExprOr& expr) {
2293 inner += "( "; 2270 inner += "( ";
2294 std::visit(*this, *expr.operand1); 2271 std::visit(*this, *expr.operand1);
2295 inner += " || "; 2272 inner += " || ";
@@ -2297,17 +2274,17 @@ public:
2297 inner += ')'; 2274 inner += ')';
2298 } 2275 }
2299 2276
2300 void operator()(VideoCommon::Shader::ExprNot& expr) { 2277 void operator()(const ExprNot& expr) {
2301 inner += '!'; 2278 inner += '!';
2302 std::visit(*this, *expr.operand1); 2279 std::visit(*this, *expr.operand1);
2303 } 2280 }
2304 2281
2305 void operator()(VideoCommon::Shader::ExprPredicate& expr) { 2282 void operator()(const ExprPredicate& expr) {
2306 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); 2283 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
2307 inner += decomp.GetPredicate(pred); 2284 inner += decomp.GetPredicate(pred);
2308 } 2285 }
2309 2286
2310 void operator()(VideoCommon::Shader::ExprCondCode& expr) { 2287 void operator()(const ExprCondCode& expr) {
2311 const Node cc = decomp.ir.GetConditionCode(expr.cc); 2288 const Node cc = decomp.ir.GetConditionCode(expr.cc);
2312 std::string target; 2289 std::string target;
2313 2290
@@ -2316,10 +2293,13 @@ public:
2316 switch (index) { 2293 switch (index) {
2317 case Tegra::Shader::Pred::NeverExecute: 2294 case Tegra::Shader::Pred::NeverExecute:
2318 target = "false"; 2295 target = "false";
2296 break;
2319 case Tegra::Shader::Pred::UnusedIndex: 2297 case Tegra::Shader::Pred::UnusedIndex:
2320 target = "true"; 2298 target = "true";
2299 break;
2321 default: 2300 default:
2322 target = decomp.GetPredicate(index); 2301 target = decomp.GetPredicate(index);
2302 break;
2323 } 2303 }
2324 } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { 2304 } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
2325 target = decomp.GetInternalFlag(flag->GetFlag()); 2305 target = decomp.GetInternalFlag(flag->GetFlag());
@@ -2329,15 +2309,20 @@ public:
2329 inner += target; 2309 inner += target;
2330 } 2310 }
2331 2311
2332 void operator()(VideoCommon::Shader::ExprVar& expr) { 2312 void operator()(const ExprVar& expr) {
2333 inner += GetFlowVariable(expr.var_index); 2313 inner += GetFlowVariable(expr.var_index);
2334 } 2314 }
2335 2315
2336 void operator()(VideoCommon::Shader::ExprBoolean& expr) { 2316 void operator()(const ExprBoolean& expr) {
2337 inner += expr.value ? "true" : "false"; 2317 inner += expr.value ? "true" : "false";
2338 } 2318 }
2339 2319
2340 std::string& GetResult() { 2320 void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
2321 inner +=
2322 "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
2323 }
2324
2325 const std::string& GetResult() const {
2341 return inner; 2326 return inner;
2342 } 2327 }
2343 2328
@@ -2350,7 +2335,7 @@ class ASTDecompiler {
2350public: 2335public:
2351 explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} 2336 explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
2352 2337
2353 void operator()(VideoCommon::Shader::ASTProgram& ast) { 2338 void operator()(const ASTProgram& ast) {
2354 ASTNode current = ast.nodes.GetFirst(); 2339 ASTNode current = ast.nodes.GetFirst();
2355 while (current) { 2340 while (current) {
2356 Visit(current); 2341 Visit(current);
@@ -2358,7 +2343,7 @@ public:
2358 } 2343 }
2359 } 2344 }
2360 2345
2361 void operator()(VideoCommon::Shader::ASTIfThen& ast) { 2346 void operator()(const ASTIfThen& ast) {
2362 ExprDecompiler expr_parser{decomp}; 2347 ExprDecompiler expr_parser{decomp};
2363 std::visit(expr_parser, *ast.condition); 2348 std::visit(expr_parser, *ast.condition);
2364 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); 2349 decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
@@ -2372,7 +2357,7 @@ public:
2372 decomp.code.AddLine("}}"); 2357 decomp.code.AddLine("}}");
2373 } 2358 }
2374 2359
2375 void operator()(VideoCommon::Shader::ASTIfElse& ast) { 2360 void operator()(const ASTIfElse& ast) {
2376 decomp.code.AddLine("else {{"); 2361 decomp.code.AddLine("else {{");
2377 decomp.code.scope++; 2362 decomp.code.scope++;
2378 ASTNode current = ast.nodes.GetFirst(); 2363 ASTNode current = ast.nodes.GetFirst();
@@ -2384,29 +2369,29 @@ public:
2384 decomp.code.AddLine("}}"); 2369 decomp.code.AddLine("}}");
2385 } 2370 }
2386 2371
2387 void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { 2372 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
2388 UNREACHABLE(); 2373 UNREACHABLE();
2389 } 2374 }
2390 2375
2391 void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { 2376 void operator()(const ASTBlockDecoded& ast) {
2392 decomp.VisitBlock(ast.nodes); 2377 decomp.VisitBlock(ast.nodes);
2393 } 2378 }
2394 2379
2395 void operator()(VideoCommon::Shader::ASTVarSet& ast) { 2380 void operator()(const ASTVarSet& ast) {
2396 ExprDecompiler expr_parser{decomp}; 2381 ExprDecompiler expr_parser{decomp};
2397 std::visit(expr_parser, *ast.condition); 2382 std::visit(expr_parser, *ast.condition);
2398 decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); 2383 decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
2399 } 2384 }
2400 2385
2401 void operator()(VideoCommon::Shader::ASTLabel& ast) { 2386 void operator()(const ASTLabel& ast) {
2402 decomp.code.AddLine("// Label_{}:", ast.index); 2387 decomp.code.AddLine("// Label_{}:", ast.index);
2403 } 2388 }
2404 2389
2405 void operator()(VideoCommon::Shader::ASTGoto& ast) { 2390 void operator()([[maybe_unused]] const ASTGoto& ast) {
2406 UNREACHABLE(); 2391 UNREACHABLE();
2407 } 2392 }
2408 2393
2409 void operator()(VideoCommon::Shader::ASTDoWhile& ast) { 2394 void operator()(const ASTDoWhile& ast) {
2410 ExprDecompiler expr_parser{decomp}; 2395 ExprDecompiler expr_parser{decomp};
2411 std::visit(expr_parser, *ast.condition); 2396 std::visit(expr_parser, *ast.condition);
2412 decomp.code.AddLine("do {{"); 2397 decomp.code.AddLine("do {{");
@@ -2420,7 +2405,7 @@ public:
2420 decomp.code.AddLine("}} while({});", expr_parser.GetResult()); 2405 decomp.code.AddLine("}} while({});", expr_parser.GetResult());
2421 } 2406 }
2422 2407
2423 void operator()(VideoCommon::Shader::ASTReturn& ast) { 2408 void operator()(const ASTReturn& ast) {
2424 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); 2409 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2425 if (!is_true) { 2410 if (!is_true) {
2426 ExprDecompiler expr_parser{decomp}; 2411 ExprDecompiler expr_parser{decomp};
@@ -2440,7 +2425,7 @@ public:
2440 } 2425 }
2441 } 2426 }
2442 2427
2443 void operator()(VideoCommon::Shader::ASTBreak& ast) { 2428 void operator()(const ASTBreak& ast) {
2444 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); 2429 const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
2445 if (!is_true) { 2430 if (!is_true) {
2446 ExprDecompiler expr_parser{decomp}; 2431 ExprDecompiler expr_parser{decomp};
@@ -2455,7 +2440,7 @@ public:
2455 } 2440 }
2456 } 2441 }
2457 2442
2458 void Visit(VideoCommon::Shader::ASTNode& node) { 2443 void Visit(const ASTNode& node) {
2459 std::visit(*this, *node->GetInnerData()); 2444 std::visit(*this, *node->GetInnerData());
2460 } 2445 }
2461 2446
@@ -2468,32 +2453,53 @@ void GLSLDecompiler::DecompileAST() {
2468 for (u32 i = 0; i < num_flow_variables; i++) { 2453 for (u32 i = 0; i < num_flow_variables; i++) {
2469 code.AddLine("bool {} = false;", GetFlowVariable(i)); 2454 code.AddLine("bool {} = false;", GetFlowVariable(i));
2470 } 2455 }
2456
2471 ASTDecompiler decompiler{*this}; 2457 ASTDecompiler decompiler{*this};
2472 VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); 2458 decompiler.Visit(ir.GetASTProgram());
2473 decompiler.Visit(program);
2474} 2459}
2475 2460
2476} // Anonymous namespace 2461} // Anonymous namespace
2477 2462
2463ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
2464 ShaderEntries entries;
2465 for (const auto& cbuf : ir.GetConstantBuffers()) {
2466 entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
2467 cbuf.first);
2468 }
2469 for (const auto& sampler : ir.GetSamplers()) {
2470 entries.samplers.emplace_back(sampler);
2471 }
2472 for (const auto& [offset, image] : ir.GetImages()) {
2473 entries.images.emplace_back(image);
2474 }
2475 for (const auto& [base, usage] : ir.GetGlobalMemory()) {
2476 entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
2477 usage.is_written);
2478 }
2479 entries.clip_distances = ir.GetClipDistances();
2480 entries.shader_length = ir.GetLength();
2481 return entries;
2482}
2483
2478std::string GetCommonDeclarations() { 2484std::string GetCommonDeclarations() {
2479 return fmt::format( 2485 return R"(#define ftoi floatBitsToInt
2480 "#define ftoi floatBitsToInt\n" 2486#define ftou floatBitsToUint
2481 "#define ftou floatBitsToUint\n" 2487#define itof intBitsToFloat
2482 "#define itof intBitsToFloat\n" 2488#define utof uintBitsToFloat
2483 "#define utof uintBitsToFloat\n\n" 2489
2484 "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" 2490bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
2485 " bvec2 is_nan1 = isnan(pair1);\n" 2491 bvec2 is_nan1 = isnan(pair1);
2486 " bvec2 is_nan2 = isnan(pair2);\n" 2492 bvec2 is_nan2 = isnan(pair2);
2487 " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " 2493 return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
2488 "is_nan2.y);\n" 2494}
2489 "}}\n\n"); 2495)";
2490} 2496}
2491 2497
2492ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, 2498std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
2493 const std::string& suffix) { 2499 const std::string& suffix) {
2494 GLSLDecompiler decompiler(device, ir, stage, suffix); 2500 GLSLDecompiler decompiler(device, ir, stage, suffix);
2495 decompiler.Decompile(); 2501 decompiler.Decompile();
2496 return {decompiler.GetResult(), decompiler.GetShaderEntries()}; 2502 return decompiler.GetResult();
2497} 2503}
2498 2504
2499} // namespace OpenGL::GLShader 2505} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e538dc001..fead2a51e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -34,10 +34,7 @@ enum class ProgramType : u32 {
34 34
35namespace OpenGL::GLShader { 35namespace OpenGL::GLShader {
36 36
37struct ShaderEntries;
38
39using Maxwell = Tegra::Engines::Maxwell3D::Regs; 37using Maxwell = Tegra::Engines::Maxwell3D::Regs;
40using ProgramResult = std::pair<std::string, ShaderEntries>;
41using SamplerEntry = VideoCommon::Shader::Sampler; 38using SamplerEntry = VideoCommon::Shader::Sampler;
42using ImageEntry = VideoCommon::Shader::Image; 39using ImageEntry = VideoCommon::Shader::Image;
43 40
@@ -93,9 +90,11 @@ struct ShaderEntries {
93 std::size_t shader_length{}; 90 std::size_t shader_length{};
94}; 91};
95 92
93ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
94
96std::string GetCommonDeclarations(); 95std::string GetCommonDeclarations();
97 96
98ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, 97std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
99 ProgramType stage, const std::string& suffix); 98 ProgramType stage, const std::string& suffix);
100 99
101} // namespace OpenGL::GLShader 100} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 74cc33476..184a565e6 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -22,6 +22,29 @@
22 22
23namespace OpenGL { 23namespace OpenGL {
24 24
25using VideoCommon::Shader::BindlessSamplerMap;
26using VideoCommon::Shader::BoundSamplerMap;
27using VideoCommon::Shader::KeyMap;
28
29namespace {
30
31struct ConstBufferKey {
32 u32 cbuf;
33 u32 offset;
34 u32 value;
35};
36
37struct BoundSamplerKey {
38 u32 offset;
39 Tegra::Engines::SamplerDescriptor sampler;
40};
41
42struct BindlessSamplerKey {
43 u32 cbuf;
44 u32 offset;
45 Tegra::Engines::SamplerDescriptor sampler;
46};
47
25using ShaderCacheVersionHash = std::array<u8, 64>; 48using ShaderCacheVersionHash = std::array<u8, 64>;
26 49
27enum class TransferableEntryKind : u32 { 50enum class TransferableEntryKind : u32 {
@@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 {
29 Usage, 52 Usage,
30}; 53};
31 54
32enum class PrecompiledEntryKind : u32 { 55constexpr u32 NativeVersion = 5;
33 Decompiled,
34 Dump,
35};
36
37constexpr u32 NativeVersion = 4;
38 56
39// Making sure sizes doesn't change by accident 57// Making sure sizes doesn't change by accident
40static_assert(sizeof(BaseBindings) == 16); 58static_assert(sizeof(BaseBindings) == 16);
41static_assert(sizeof(ShaderDiskCacheUsage) == 40);
42
43namespace {
44 59
45ShaderCacheVersionHash GetShaderCacheVersionHash() { 60ShaderCacheVersionHash GetShaderCacheVersionHash() {
46 ShaderCacheVersionHash hash{}; 61 ShaderCacheVersionHash hash{};
@@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
49 return hash; 64 return hash;
50} 65}
51 66
52} // namespace 67} // Anonymous namespace
53 68
54ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 69ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
55 u32 program_code_size, u32 program_code_size_b,
56 ProgramCode program_code, ProgramCode program_code_b) 70 ProgramCode program_code, ProgramCode program_code_b)
57 : unique_identifier{unique_identifier}, program_type{program_type}, 71 : unique_identifier{unique_identifier}, program_type{program_type},
58 program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
59 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} 72 program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
60 73
61ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; 74ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
@@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
90bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { 103bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
91 if (file.WriteObject(unique_identifier) != 1 || 104 if (file.WriteObject(unique_identifier) != 1 ||
92 file.WriteObject(static_cast<u32>(program_type)) != 1 || 105 file.WriteObject(static_cast<u32>(program_type)) != 1 ||
93 file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { 106 file.WriteObject(static_cast<u32>(program_code.size())) != 1 ||
107 file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
94 return false; 108 return false;
95 } 109 }
96 110
97 if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) 111 if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size())
98 return false; 112 return false;
99 113
100 if (HasProgramA() && 114 if (HasProgramA() &&
101 file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { 115 file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
102 return false; 116 return false;
103 } 117 }
104 return true; 118 return true;
@@ -127,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
127 u32 version{}; 141 u32 version{};
128 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { 142 if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
129 LOG_ERROR(Render_OpenGL, 143 LOG_ERROR(Render_OpenGL,
130 "Failed to get transferable cache version for title id={} - skipping", 144 "Failed to get transferable cache version for title id={}, skipping",
131 GetTitleID()); 145 GetTitleID());
132 return {}; 146 return {};
133 } 147 }
134 148
135 if (version < NativeVersion) { 149 if (version < NativeVersion) {
136 LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); 150 LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
137 file.Close(); 151 file.Close();
138 InvalidateTransferable(); 152 InvalidateTransferable();
139 is_usable = true; 153 is_usable = true;
@@ -141,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
141 } 155 }
142 if (version > NativeVersion) { 156 if (version > NativeVersion) {
143 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " 157 LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
144 "of the emulator - skipping"); 158 "of the emulator, skipping");
145 return {}; 159 return {};
146 } 160 }
147 161
148 // Version is valid, load the shaders 162 // Version is valid, load the shaders
163 constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
149 std::vector<ShaderDiskCacheRaw> raws; 164 std::vector<ShaderDiskCacheRaw> raws;
150 std::vector<ShaderDiskCacheUsage> usages; 165 std::vector<ShaderDiskCacheUsage> usages;
151 while (file.Tell() < file.GetSize()) { 166 while (file.Tell() < file.GetSize()) {
152 TransferableEntryKind kind{}; 167 TransferableEntryKind kind{};
153 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { 168 if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
154 LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); 169 LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
155 return {}; 170 return {};
156 } 171 }
157 172
@@ -159,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
159 case TransferableEntryKind::Raw: { 174 case TransferableEntryKind::Raw: {
160 ShaderDiskCacheRaw entry; 175 ShaderDiskCacheRaw entry;
161 if (!entry.Load(file)) { 176 if (!entry.Load(file)) {
162 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); 177 LOG_ERROR(Render_OpenGL, error_loading);
163 return {}; 178 return {};
164 } 179 }
165 transferable.insert({entry.GetUniqueIdentifier(), {}}); 180 transferable.insert({entry.GetUniqueIdentifier(), {}});
@@ -167,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
167 break; 182 break;
168 } 183 }
169 case TransferableEntryKind::Usage: { 184 case TransferableEntryKind::Usage: {
170 ShaderDiskCacheUsage usage{}; 185 ShaderDiskCacheUsage usage;
171 if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { 186
172 LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); 187 u32 num_keys{};
188 u32 num_bound_samplers{};
189 u32 num_bindless_samplers{};
190 if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
191 file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
192 file.ReadArray(&num_bound_samplers, 1) != 1 ||
193 file.ReadArray(&num_bindless_samplers, 1) != 1) {
194 LOG_ERROR(Render_OpenGL, error_loading);
173 return {}; 195 return {};
174 } 196 }
197
198 std::vector<ConstBufferKey> keys(num_keys);
199 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
200 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
201 if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
202 file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
203 bound_samplers.size() ||
204 file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
205 bindless_samplers.size()) {
206 LOG_ERROR(Render_OpenGL, error_loading);
207 return {};
208 }
209 for (const auto& key : keys) {
210 usage.keys.insert({{key.cbuf, key.offset}, key.value});
211 }
212 for (const auto& key : bound_samplers) {
213 usage.bound_samplers.emplace(key.offset, key.sampler);
214 }
215 for (const auto& key : bindless_samplers) {
216 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
217 }
218
175 usages.push_back(std::move(usage)); 219 usages.push_back(std::move(usage));
176 break; 220 break;
177 } 221 }
178 default: 222 default:
179 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", 223 LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
180 static_cast<u32>(kind)); 224 static_cast<u32>(kind));
181 return {}; 225 return {};
182 } 226 }
@@ -186,13 +230,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
186 return {{std::move(raws), std::move(usages)}}; 230 return {{std::move(raws), std::move(usages)}};
187} 231}
188 232
189std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> 233std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
190ShaderDiskCacheOpenGL::LoadPrecompiled() { 234ShaderDiskCacheOpenGL::LoadPrecompiled() {
191 if (!is_usable) { 235 if (!is_usable) {
192 return {}; 236 return {};
193 } 237 }
194 238
195 FileUtil::IOFile file(GetPrecompiledPath(), "rb"); 239 std::string path = GetPrecompiledPath();
240 FileUtil::IOFile file(path, "rb");
196 if (!file.IsOpen()) { 241 if (!file.IsOpen()) {
197 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", 242 LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
198 GetTitleID()); 243 GetTitleID());
@@ -202,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
202 const auto result = LoadPrecompiledFile(file); 247 const auto result = LoadPrecompiledFile(file);
203 if (!result) { 248 if (!result) {
204 LOG_INFO(Render_OpenGL, 249 LOG_INFO(Render_OpenGL,
205 "Failed to load precompiled cache for game with title id={} - removing", 250 "Failed to load precompiled cache for game with title id={}, removing",
206 GetTitleID()); 251 GetTitleID());
207 file.Close(); 252 file.Close();
208 InvalidatePrecompiled(); 253 InvalidatePrecompiled();
@@ -211,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
211 return *result; 256 return *result;
212} 257}
213 258
214std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> 259std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
215ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { 260ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
216 // Read compressed file from disk and decompress to virtual precompiled cache file 261 // Read compressed file from disk and decompress to virtual precompiled cache file
217 std::vector<u8> compressed(file.GetSize()); 262 std::vector<u8> compressed(file.GetSize());
@@ -231,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
231 return {}; 276 return {};
232 } 277 }
233 278
234 std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
235 ShaderDumpsMap dumps; 279 ShaderDumpsMap dumps;
236 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { 280 while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
237 PrecompiledEntryKind kind{}; 281 u32 num_keys{};
238 if (!LoadObjectFromPrecompiled(kind)) { 282 u32 num_bound_samplers{};
283 u32 num_bindless_samplers{};
284 ShaderDiskCacheUsage usage;
285 if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
286 !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
287 !LoadObjectFromPrecompiled(num_bound_samplers) ||
288 !LoadObjectFromPrecompiled(num_bindless_samplers)) {
239 return {}; 289 return {};
240 } 290 }
241 291 std::vector<ConstBufferKey> keys(num_keys);
242 switch (kind) { 292 std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
243 case PrecompiledEntryKind::Decompiled: { 293 std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
244 u64 unique_identifier{}; 294 if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
245 if (!LoadObjectFromPrecompiled(unique_identifier)) { 295 !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
246 return {}; 296 bound_samplers.size() ||
247 } 297 !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
248 298 bindless_samplers.size()) {
249 auto entry = LoadDecompiledEntry();
250 if (!entry) {
251 return {};
252 }
253 decompiled.insert({unique_identifier, std::move(*entry)});
254 break;
255 }
256 case PrecompiledEntryKind::Dump: {
257 ShaderDiskCacheUsage usage;
258 if (!LoadObjectFromPrecompiled(usage)) {
259 return {};
260 }
261
262 ShaderDiskCacheDump dump;
263 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
264 return {};
265 }
266
267 u32 binary_length{};
268 if (!LoadObjectFromPrecompiled(binary_length)) {
269 return {};
270 }
271
272 dump.binary.resize(binary_length);
273 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
274 return {};
275 }
276
277 dumps.insert({usage, dump});
278 break;
279 }
280 default:
281 return {}; 299 return {};
282 } 300 }
283 } 301 for (const auto& key : keys) {
284 return {{decompiled, dumps}}; 302 usage.keys.insert({{key.cbuf, key.offset}, key.value});
285}
286
287std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
288 u32 code_size{};
289 if (!LoadObjectFromPrecompiled(code_size)) {
290 return {};
291 }
292
293 std::string code(code_size, '\0');
294 if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
295 return {};
296 }
297
298 ShaderDiskCacheDecompiled entry;
299 entry.code = std::move(code);
300
301 u32 const_buffers_count{};
302 if (!LoadObjectFromPrecompiled(const_buffers_count)) {
303 return {};
304 }
305
306 for (u32 i = 0; i < const_buffers_count; ++i) {
307 u32 max_offset{};
308 u32 index{};
309 bool is_indirect{};
310 if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
311 !LoadObjectFromPrecompiled(is_indirect)) {
312 return {};
313 } 303 }
314 entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); 304 for (const auto& key : bound_samplers) {
315 } 305 usage.bound_samplers.emplace(key.offset, key.sampler);
316
317 u32 samplers_count{};
318 if (!LoadObjectFromPrecompiled(samplers_count)) {
319 return {};
320 }
321
322 for (u32 i = 0; i < samplers_count; ++i) {
323 u64 offset{};
324 u64 index{};
325 u32 type{};
326 bool is_array{};
327 bool is_shadow{};
328 bool is_bindless{};
329 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
330 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
331 !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
332 return {};
333 } 306 }
334 entry.entries.samplers.emplace_back( 307 for (const auto& key : bindless_samplers) {
335 static_cast<std::size_t>(offset), static_cast<std::size_t>(index), 308 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
336 static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
337 }
338
339 u32 images_count{};
340 if (!LoadObjectFromPrecompiled(images_count)) {
341 return {};
342 }
343 for (u32 i = 0; i < images_count; ++i) {
344 u64 offset{};
345 u64 index{};
346 u32 type{};
347 u8 is_bindless{};
348 u8 is_written{};
349 u8 is_read{};
350 u8 is_atomic{};
351 if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
352 !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
353 !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
354 !LoadObjectFromPrecompiled(is_atomic)) {
355 return {};
356 } 309 }
357 entry.entries.images.emplace_back(
358 static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
359 static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
360 is_read != 0, is_atomic != 0);
361 }
362 310
363 u32 global_memory_count{}; 311 ShaderDiskCacheDump dump;
364 if (!LoadObjectFromPrecompiled(global_memory_count)) { 312 if (!LoadObjectFromPrecompiled(dump.binary_format)) {
365 return {};
366 }
367 for (u32 i = 0; i < global_memory_count; ++i) {
368 u32 cbuf_index{};
369 u32 cbuf_offset{};
370 bool is_read{};
371 bool is_written{};
372 if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
373 !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
374 return {}; 313 return {};
375 } 314 }
376 entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
377 is_written);
378 }
379 315
380 for (auto& clip_distance : entry.entries.clip_distances) { 316 u32 binary_length{};
381 if (!LoadObjectFromPrecompiled(clip_distance)) { 317 if (!LoadObjectFromPrecompiled(binary_length)) {
382 return {}; 318 return {};
383 } 319 }
384 }
385
386 u64 shader_length{};
387 if (!LoadObjectFromPrecompiled(shader_length)) {
388 return {};
389 }
390 entry.entries.shader_length = static_cast<std::size_t>(shader_length);
391
392 return entry;
393}
394
395bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
396 const GLShader::ShaderEntries& entries) {
397 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
398 !SaveObjectToPrecompiled(unique_identifier) ||
399 !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
400 !SaveArrayToPrecompiled(code.data(), code.size())) {
401 return false;
402 }
403
404 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
405 return false;
406 }
407 for (const auto& cbuf : entries.const_buffers) {
408 if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
409 !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
410 !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
411 return false;
412 }
413 }
414
415 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
416 return false;
417 }
418 for (const auto& sampler : entries.samplers) {
419 if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
420 !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
421 !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
422 !SaveObjectToPrecompiled(sampler.IsArray()) ||
423 !SaveObjectToPrecompiled(sampler.IsShadow()) ||
424 !SaveObjectToPrecompiled(sampler.IsBindless())) {
425 return false;
426 }
427 }
428
429 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
430 return false;
431 }
432 for (const auto& image : entries.images) {
433 if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
434 !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
435 !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
436 !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
437 !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
438 !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
439 !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) {
440 return false;
441 }
442 }
443 320
444 if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { 321 dump.binary.resize(binary_length);
445 return false; 322 if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
446 } 323 return {};
447 for (const auto& gmem : entries.global_memory_entries) {
448 if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
449 !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
450 !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
451 return false;
452 }
453 }
454
455 for (const bool clip_distance : entries.clip_distances) {
456 if (!SaveObjectToPrecompiled(clip_distance)) {
457 return false;
458 } 324 }
459 }
460 325
461 if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { 326 dumps.emplace(std::move(usage), dump);
462 return false;
463 } 327 }
464 328 return dumps;
465 return true;
466} 329}
467 330
468void ShaderDiskCacheOpenGL::InvalidateTransferable() { 331void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -494,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
494 } 357 }
495 358
496 FileUtil::IOFile file = AppendTransferableFile(); 359 FileUtil::IOFile file = AppendTransferableFile();
497 if (!file.IsOpen()) 360 if (!file.IsOpen()) {
498 return; 361 return;
362 }
499 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { 363 if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
500 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); 364 LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
501 file.Close(); 365 file.Close();
502 InvalidateTransferable(); 366 InvalidateTransferable();
503 return; 367 return;
@@ -523,29 +387,39 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
523 FileUtil::IOFile file = AppendTransferableFile(); 387 FileUtil::IOFile file = AppendTransferableFile();
524 if (!file.IsOpen()) 388 if (!file.IsOpen())
525 return; 389 return;
526 390 const auto Close = [&] {
527 if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { 391 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
528 LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
529 file.Close(); 392 file.Close();
530 InvalidateTransferable(); 393 InvalidateTransferable();
531 return; 394 };
532 }
533}
534 395
535void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, 396 if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
536 const GLShader::ShaderEntries& entries) { 397 file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
537 if (!is_usable) { 398 file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
399 file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
400 file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
401 Close();
538 return; 402 return;
539 } 403 }
540 404 for (const auto& [pair, value] : usage.keys) {
541 if (precompiled_cache_virtual_file.GetSize() == 0) { 405 const auto [cbuf, offset] = pair;
542 SavePrecompiledHeaderToVirtualPrecompiledCache(); 406 if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
407 Close();
408 return;
409 }
543 } 410 }
544 411 for (const auto& [offset, sampler] : usage.bound_samplers) {
545 if (!SaveDecompiledFile(unique_identifier, code, entries)) { 412 if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
546 LOG_ERROR(Render_OpenGL, 413 Close();
547 "Failed to save decompiled entry to the precompiled file - removing"); 414 return;
548 InvalidatePrecompiled(); 415 }
416 }
417 for (const auto& [pair, sampler] : usage.bindless_samplers) {
418 const auto [cbuf, offset] = pair;
419 if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
420 Close();
421 return;
422 }
549 } 423 }
550} 424}
551 425
@@ -554,6 +428,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
554 return; 428 return;
555 } 429 }
556 430
431 // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
432 // when writing the dump. This should be done the moment I get access to write to the virtual
433 // file.
434 if (precompiled_cache_virtual_file.GetSize() == 0) {
435 SavePrecompiledHeaderToVirtualPrecompiledCache();
436 }
437
557 GLint binary_length{}; 438 GLint binary_length{};
558 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); 439 glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
559 440
@@ -561,21 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
561 std::vector<u8> binary(binary_length); 442 std::vector<u8> binary(binary_length);
562 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); 443 glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
563 444
564 if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || 445 const auto Close = [&] {
565 !SaveObjectToPrecompiled(usage) || 446 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
566 !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
567 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
568 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
569 LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
570 usage.unique_identifier); 447 usage.unique_identifier);
571 InvalidatePrecompiled(); 448 InvalidatePrecompiled();
449 };
450
451 if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
452 !SaveObjectToPrecompiled(usage.variant) ||
453 !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
454 !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
455 !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
456 Close();
572 return; 457 return;
573 } 458 }
459 for (const auto& [pair, value] : usage.keys) {
460 const auto [cbuf, offset] = pair;
461 if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
462 Close();
463 return;
464 }
465 }
466 for (const auto& [offset, sampler] : usage.bound_samplers) {
467 if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
468 Close();
469 return;
470 }
471 }
472 for (const auto& [pair, sampler] : usage.bindless_samplers) {
473 const auto [cbuf, offset] = pair;
474 if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
475 Close();
476 return;
477 }
478 }
479 if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
480 !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
481 !SaveArrayToPrecompiled(binary.data(), binary.size())) {
482 Close();
483 }
574} 484}
575 485
576FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { 486FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
577 if (!EnsureDirectories()) 487 if (!EnsureDirectories()) {
578 return {}; 488 return {};
489 }
579 490
580 const auto transferable_path{GetTransferablePath()}; 491 const auto transferable_path{GetTransferablePath()};
581 const bool existed = FileUtil::Exists(transferable_path); 492 const bool existed = FileUtil::Exists(transferable_path);
@@ -607,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
607 518
608void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { 519void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
609 precompiled_cache_virtual_file_offset = 0; 520 precompiled_cache_virtual_file_offset = 0;
610 const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); 521 const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
611 const std::vector<u8>& compressed = 522 const std::vector<u8> compressed =
612 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); 523 Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
613 524
614 const auto precompiled_path{GetPrecompiledPath()}; 525 const auto precompiled_path{GetPrecompiledPath()};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 9595bd71b..db23ada93 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -8,6 +8,7 @@
8#include <optional> 8#include <optional>
9#include <string> 9#include <string>
10#include <tuple> 10#include <tuple>
11#include <type_traits>
11#include <unordered_map> 12#include <unordered_map>
12#include <unordered_set> 13#include <unordered_set>
13#include <utility> 14#include <utility>
@@ -19,6 +20,7 @@
19#include "common/common_types.h" 20#include "common/common_types.h"
20#include "core/file_sys/vfs_vector.h" 21#include "core/file_sys/vfs_vector.h"
21#include "video_core/renderer_opengl/gl_shader_gen.h" 22#include "video_core/renderer_opengl/gl_shader_gen.h"
23#include "video_core/shader/const_buffer_locker.h"
22 24
23namespace Core { 25namespace Core {
24class System; 26class System;
@@ -53,6 +55,7 @@ struct BaseBindings {
53 return !operator==(rhs); 55 return !operator==(rhs);
54 } 56 }
55}; 57};
58static_assert(std::is_trivially_copyable_v<BaseBindings>);
56 59
57/// Describes the different variants a single program can be compiled. 60/// Describes the different variants a single program can be compiled.
58struct ProgramVariant { 61struct ProgramVariant {
@@ -70,13 +73,20 @@ struct ProgramVariant {
70 } 73 }
71}; 74};
72 75
76static_assert(std::is_trivially_copyable_v<ProgramVariant>);
77
73/// Describes how a shader is used. 78/// Describes how a shader is used.
74struct ShaderDiskCacheUsage { 79struct ShaderDiskCacheUsage {
75 u64 unique_identifier{}; 80 u64 unique_identifier{};
76 ProgramVariant variant; 81 ProgramVariant variant;
82 VideoCommon::Shader::KeyMap keys;
83 VideoCommon::Shader::BoundSamplerMap bound_samplers;
84 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
77 85
78 bool operator==(const ShaderDiskCacheUsage& rhs) const { 86 bool operator==(const ShaderDiskCacheUsage& rhs) const {
79 return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); 87 return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
88 std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
89 rhs.bindless_samplers);
80 } 90 }
81 91
82 bool operator!=(const ShaderDiskCacheUsage& rhs) const { 92 bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -123,8 +133,7 @@ namespace OpenGL {
123class ShaderDiskCacheRaw { 133class ShaderDiskCacheRaw {
124public: 134public:
125 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, 135 explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
126 u32 program_code_size, u32 program_code_size_b, 136 ProgramCode program_code, ProgramCode program_code_b = {});
127 ProgramCode program_code, ProgramCode program_code_b);
128 ShaderDiskCacheRaw(); 137 ShaderDiskCacheRaw();
129 ~ShaderDiskCacheRaw(); 138 ~ShaderDiskCacheRaw();
130 139
@@ -155,22 +164,14 @@ public:
155private: 164private:
156 u64 unique_identifier{}; 165 u64 unique_identifier{};
157 ProgramType program_type{}; 166 ProgramType program_type{};
158 u32 program_code_size{};
159 u32 program_code_size_b{};
160 167
161 ProgramCode program_code; 168 ProgramCode program_code;
162 ProgramCode program_code_b; 169 ProgramCode program_code_b;
163}; 170};
164 171
165/// Contains decompiled data from a shader
166struct ShaderDiskCacheDecompiled {
167 std::string code;
168 GLShader::ShaderEntries entries;
169};
170
171/// Contains an OpenGL dumped binary program 172/// Contains an OpenGL dumped binary program
172struct ShaderDiskCacheDump { 173struct ShaderDiskCacheDump {
173 GLenum binary_format; 174 GLenum binary_format{};
174 std::vector<u8> binary; 175 std::vector<u8> binary;
175}; 176};
176 177
@@ -184,9 +185,7 @@ public:
184 LoadTransferable(); 185 LoadTransferable();
185 186
186 /// Loads current game's precompiled cache. Invalidates on failure. 187 /// Loads current game's precompiled cache. Invalidates on failure.
187 std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 188 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
188 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
189 LoadPrecompiled();
190 189
191 /// Removes the transferable (and precompiled) cache file. 190 /// Removes the transferable (and precompiled) cache file.
192 void InvalidateTransferable(); 191 void InvalidateTransferable();
@@ -200,10 +199,6 @@ public:
200 /// Saves shader usage to the transferable file. Does not check for collisions. 199 /// Saves shader usage to the transferable file. Does not check for collisions.
201 void SaveUsage(const ShaderDiskCacheUsage& usage); 200 void SaveUsage(const ShaderDiskCacheUsage& usage);
202 201
203 /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
204 void SaveDecompiled(u64 unique_identifier, const std::string& code,
205 const GLShader::ShaderEntries& entries);
206
207 /// Saves a dump entry to the precompiled file. Does not check for collisions. 202 /// Saves a dump entry to the precompiled file. Does not check for collisions.
208 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); 203 void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
209 204
@@ -212,18 +207,9 @@ public:
212 207
213private: 208private:
214 /// Loads the transferable cache. Returns empty on failure. 209 /// Loads the transferable cache. Returns empty on failure.
215 std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, 210 std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
216 std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
217 LoadPrecompiledFile(FileUtil::IOFile& file); 211 LoadPrecompiledFile(FileUtil::IOFile& file);
218 212
219 /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
220 /// failure.
221 std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
222
223 /// Saves a decompiled entry to the passed file. Returns true on success.
224 bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
225 const GLShader::ShaderEntries& entries);
226
227 /// Opens current game's transferable file and write it's header if it doesn't exist 213 /// Opens current game's transferable file and write it's header if it doesn't exist
228 FileUtil::IOFile AppendTransferableFile() const; 214 FileUtil::IOFile AppendTransferableFile() const;
229 215
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index b5a43e79e..0e22eede9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -16,17 +16,8 @@ using VideoCommon::Shader::CompilerSettings;
16using VideoCommon::Shader::ProgramCode; 16using VideoCommon::Shader::ProgramCode;
17using VideoCommon::Shader::ShaderIR; 17using VideoCommon::Shader::ShaderIR;
18 18
19static constexpr u32 PROGRAM_OFFSET = 10; 19std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
20static constexpr u32 COMPUTE_OFFSET = 0; 20 std::string out = GetCommonDeclarations();
21
22static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true};
23
24ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
25 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
26
27 std::string out = "// Shader Unique Id: VS" + id + "\n\n";
28 out += GetCommonDeclarations();
29
30 out += R"( 21 out += R"(
31layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { 22layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
32 vec4 viewport_flip; 23 vec4 viewport_flip;
@@ -34,17 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
34}; 25};
35 26
36)"; 27)";
37 28 const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB;
38 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); 29 out += Decompile(device, ir, stage, "vertex");
39 const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; 30 if (ir_b) {
40 ProgramResult program = Decompile(device, program_ir, stage, "vertex"); 31 out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b");
41 out += program.first;
42
43 if (setup.IsDualProgram()) {
44 const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b,
45 settings);
46 ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
47 out += program_b.first;
48 } 32 }
49 33
50 out += R"( 34 out += R"(
@@ -52,7 +36,7 @@ void main() {
52 execute_vertex(); 36 execute_vertex();
53)"; 37)";
54 38
55 if (setup.IsDualProgram()) { 39 if (ir_b) {
56 out += " execute_vertex_b();"; 40 out += " execute_vertex_b();";
57 } 41 }
58 42
@@ -66,17 +50,13 @@ void main() {
66 // Viewport can be flipped, which is unsupported by glViewport 50 // Viewport can be flipped, which is unsupported by glViewport
67 gl_Position.xy *= viewport_flip.xy; 51 gl_Position.xy *= viewport_flip.xy;
68 } 52 }
69})"; 53}
70 54)";
71 return {std::move(out), std::move(program.second)}; 55 return out;
72} 56}
73 57
74ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { 58std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
75 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 59 std::string out = GetCommonDeclarations();
76
77 std::string out = "// Shader Unique Id: GS" + id + "\n\n";
78 out += GetCommonDeclarations();
79
80 out += R"( 60 out += R"(
81layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { 61layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
82 vec4 viewport_flip; 62 vec4 viewport_flip;
@@ -84,25 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
84}; 64};
85 65
86)"; 66)";
87 67 out += Decompile(device, ir, ProgramType::Geometry, "geometry");
88 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
89 ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
90 out += program.first;
91 68
92 out += R"( 69 out += R"(
93void main() { 70void main() {
94 execute_geometry(); 71 execute_geometry();
95};)"; 72}
96 73)";
97 return {std::move(out), std::move(program.second)}; 74 return out;
98} 75}
99 76
100ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { 77std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
101 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 78 std::string out = GetCommonDeclarations();
102
103 std::string out = "// Shader Unique Id: FS" + id + "\n\n";
104 out += GetCommonDeclarations();
105
106 out += R"( 79 out += R"(
107layout (location = 0) out vec4 FragColor0; 80layout (location = 0) out vec4 FragColor0;
108layout (location = 1) out vec4 FragColor1; 81layout (location = 1) out vec4 FragColor1;
@@ -119,36 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
119}; 92};
120 93
121)"; 94)";
122 95 out += Decompile(device, ir, ProgramType::Fragment, "fragment");
123 const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings);
124 ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
125 out += program.first;
126 96
127 out += R"( 97 out += R"(
128void main() { 98void main() {
129 execute_fragment(); 99 execute_fragment();
130} 100}
131
132)"; 101)";
133 return {std::move(out), std::move(program.second)}; 102 return out;
134} 103}
135 104
136ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { 105std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
137 const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); 106 std::string out = GetCommonDeclarations();
138 107 out += Decompile(device, ir, ProgramType::Compute, "compute");
139 std::string out = "// Shader Unique Id: CS" + id + "\n\n";
140 out += GetCommonDeclarations();
141
142 const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings);
143 ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
144 out += program.first;
145
146 out += R"( 108 out += R"(
147void main() { 109void main() {
148 execute_compute(); 110 execute_compute();
149} 111}
150)"; 112)";
151 return {std::move(out), std::move(program.second)}; 113 return out;
152} 114}
153 115
154} // namespace OpenGL::GLShader 116} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3833e88ab..cba2be9f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -17,44 +17,18 @@ class Device;
17namespace OpenGL::GLShader { 17namespace OpenGL::GLShader {
18 18
19using VideoCommon::Shader::ProgramCode; 19using VideoCommon::Shader::ProgramCode;
20 20using VideoCommon::Shader::ShaderIR;
21struct ShaderSetup {
22 explicit ShaderSetup(ProgramCode program_code) {
23 program.code = std::move(program_code);
24 }
25
26 struct {
27 ProgramCode code;
28 ProgramCode code_b; // Used for dual vertex shaders
29 u64 unique_identifier;
30 std::size_t size_a;
31 std::size_t size_b;
32 } program;
33
34 /// Used in scenarios where we have a dual vertex shaders
35 void SetProgramB(ProgramCode program_b) {
36 program.code_b = std::move(program_b);
37 has_program_b = true;
38 }
39
40 bool IsDualProgram() const {
41 return has_program_b;
42 }
43
44private:
45 bool has_program_b{};
46};
47 21
48/// Generates the GLSL vertex shader program source code for the given VS program 22/// Generates the GLSL vertex shader program source code for the given VS program
49ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); 23std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
50 24
51/// Generates the GLSL geometry shader program source code for the given GS program 25/// Generates the GLSL geometry shader program source code for the given GS program
52ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); 26std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
53 27
54/// Generates the GLSL fragment shader program source code for the given FS program 28/// Generates the GLSL fragment shader program source code for the given FS program
55ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); 29std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
56 30
57/// Generates the GLSL compute shader program source code for the given CS program 31/// Generates the GLSL compute shader program source code for the given CS program
58ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); 32std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
59 33
60} // namespace OpenGL::GLShader 34} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 173b76c4e..2f9bfd7e4 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -111,7 +111,8 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
111 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, 111 {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
112 true}, // DXT45_SRGB 112 true}, // DXT45_SRGB
113 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, 113 {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
114 true}, // BC7U_SRGB 114 true}, // BC7U_SRGB
115 {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, ComponentType::UNorm, false}, // R4G4B4A4U
115 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB 116 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
116 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB 117 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
117 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB 118 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
@@ -120,6 +121,16 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
120 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB 121 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
121 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 122 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
122 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB 123 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
124 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6
125 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6_SRGB
126 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10
127 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10_SRGB
128 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12
129 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12_SRGB
130 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6
131 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB
132 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5
133 {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB
123 134
124 // Depth formats 135 // Depth formats
125 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F 136 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1e6ef66ab..4bbd17b12 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
102RendererOpenGL::~RendererOpenGL() = default; 102RendererOpenGL::~RendererOpenGL() = default;
103 103
104void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { 104void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
105 system.GetPerfStats().EndSystemFrame();
106
107 // Maintain the rasterizer's state as a priority 105 // Maintain the rasterizer's state as a priority
108 OpenGLState prev_state = OpenGLState::GetCurState(); 106 OpenGLState prev_state = OpenGLState::GetCurState();
109 state.AllDirty(); 107 state.AllDirty();
@@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
135 133
136 render_window.PollEvents(); 134 render_window.PollEvents();
137 135
138 system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
139 system.GetPerfStats().BeginSystemFrame();
140
141 // Restore the rasterizer state 136 // Restore the rasterizer state
142 prev_state.AllDirty(); 137 prev_state.AllDirty();
143 prev_state.Apply(); 138 prev_state.Apply();
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 8bcd04221..42cf068b6 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1648,32 +1648,32 @@ class ExprDecompiler {
1648public: 1648public:
1649 explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} 1649 explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
1650 1650
1651 Id operator()(VideoCommon::Shader::ExprAnd& expr) { 1651 Id operator()(const ExprAnd& expr) {
1652 const Id type_def = decomp.GetTypeDefinition(Type::Bool); 1652 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
1653 const Id op1 = Visit(expr.operand1); 1653 const Id op1 = Visit(expr.operand1);
1654 const Id op2 = Visit(expr.operand2); 1654 const Id op2 = Visit(expr.operand2);
1655 return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2)); 1655 return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2));
1656 } 1656 }
1657 1657
1658 Id operator()(VideoCommon::Shader::ExprOr& expr) { 1658 Id operator()(const ExprOr& expr) {
1659 const Id type_def = decomp.GetTypeDefinition(Type::Bool); 1659 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
1660 const Id op1 = Visit(expr.operand1); 1660 const Id op1 = Visit(expr.operand1);
1661 const Id op2 = Visit(expr.operand2); 1661 const Id op2 = Visit(expr.operand2);
1662 return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2)); 1662 return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2));
1663 } 1663 }
1664 1664
1665 Id operator()(VideoCommon::Shader::ExprNot& expr) { 1665 Id operator()(const ExprNot& expr) {
1666 const Id type_def = decomp.GetTypeDefinition(Type::Bool); 1666 const Id type_def = decomp.GetTypeDefinition(Type::Bool);
1667 const Id op1 = Visit(expr.operand1); 1667 const Id op1 = Visit(expr.operand1);
1668 return decomp.Emit(decomp.OpLogicalNot(type_def, op1)); 1668 return decomp.Emit(decomp.OpLogicalNot(type_def, op1));
1669 } 1669 }
1670 1670
1671 Id operator()(VideoCommon::Shader::ExprPredicate& expr) { 1671 Id operator()(const ExprPredicate& expr) {
1672 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); 1672 const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
1673 return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred))); 1673 return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)));
1674 } 1674 }
1675 1675
1676 Id operator()(VideoCommon::Shader::ExprCondCode& expr) { 1676 Id operator()(const ExprCondCode& expr) {
1677 const Node cc = decomp.ir.GetConditionCode(expr.cc); 1677 const Node cc = decomp.ir.GetConditionCode(expr.cc);
1678 Id target; 1678 Id target;
1679 1679
@@ -1682,10 +1682,13 @@ public:
1682 switch (index) { 1682 switch (index) {
1683 case Tegra::Shader::Pred::NeverExecute: 1683 case Tegra::Shader::Pred::NeverExecute:
1684 target = decomp.v_false; 1684 target = decomp.v_false;
1685 break;
1685 case Tegra::Shader::Pred::UnusedIndex: 1686 case Tegra::Shader::Pred::UnusedIndex:
1686 target = decomp.v_true; 1687 target = decomp.v_true;
1688 break;
1687 default: 1689 default:
1688 target = decomp.predicates.at(index); 1690 target = decomp.predicates.at(index);
1691 break;
1689 } 1692 }
1690 } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { 1693 } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
1691 target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); 1694 target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
@@ -1693,15 +1696,22 @@ public:
1693 return decomp.Emit(decomp.OpLoad(decomp.t_bool, target)); 1696 return decomp.Emit(decomp.OpLoad(decomp.t_bool, target));
1694 } 1697 }
1695 1698
1696 Id operator()(VideoCommon::Shader::ExprVar& expr) { 1699 Id operator()(const ExprVar& expr) {
1697 return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index))); 1700 return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)));
1698 } 1701 }
1699 1702
1700 Id operator()(VideoCommon::Shader::ExprBoolean& expr) { 1703 Id operator()(const ExprBoolean& expr) {
1701 return expr.value ? decomp.v_true : decomp.v_false; 1704 return expr.value ? decomp.v_true : decomp.v_false;
1702 } 1705 }
1703 1706
1704 Id Visit(VideoCommon::Shader::Expr& node) { 1707 Id operator()(const ExprGprEqual& expr) {
1708 const Id target = decomp.Constant(decomp.t_uint, expr.value);
1709 const Id gpr = decomp.BitcastTo<Type::Uint>(
1710 decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr))));
1711 return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target));
1712 }
1713
1714 Id Visit(const Expr& node) {
1705 return std::visit(*this, *node); 1715 return std::visit(*this, *node);
1706 } 1716 }
1707 1717
@@ -1713,7 +1723,7 @@ class ASTDecompiler {
1713public: 1723public:
1714 explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} 1724 explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
1715 1725
1716 void operator()(VideoCommon::Shader::ASTProgram& ast) { 1726 void operator()(const ASTProgram& ast) {
1717 ASTNode current = ast.nodes.GetFirst(); 1727 ASTNode current = ast.nodes.GetFirst();
1718 while (current) { 1728 while (current) {
1719 Visit(current); 1729 Visit(current);
@@ -1721,7 +1731,7 @@ public:
1721 } 1731 }
1722 } 1732 }
1723 1733
1724 void operator()(VideoCommon::Shader::ASTIfThen& ast) { 1734 void operator()(const ASTIfThen& ast) {
1725 ExprDecompiler expr_parser{decomp}; 1735 ExprDecompiler expr_parser{decomp};
1726 const Id condition = expr_parser.Visit(ast.condition); 1736 const Id condition = expr_parser.Visit(ast.condition);
1727 const Id then_label = decomp.OpLabel(); 1737 const Id then_label = decomp.OpLabel();
@@ -1738,33 +1748,33 @@ public:
1738 decomp.Emit(endif_label); 1748 decomp.Emit(endif_label);
1739 } 1749 }
1740 1750
1741 void operator()(VideoCommon::Shader::ASTIfElse& ast) { 1751 void operator()([[maybe_unused]] const ASTIfElse& ast) {
1742 UNREACHABLE(); 1752 UNREACHABLE();
1743 } 1753 }
1744 1754
1745 void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { 1755 void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
1746 UNREACHABLE(); 1756 UNREACHABLE();
1747 } 1757 }
1748 1758
1749 void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { 1759 void operator()(const ASTBlockDecoded& ast) {
1750 decomp.VisitBasicBlock(ast.nodes); 1760 decomp.VisitBasicBlock(ast.nodes);
1751 } 1761 }
1752 1762
1753 void operator()(VideoCommon::Shader::ASTVarSet& ast) { 1763 void operator()(const ASTVarSet& ast) {
1754 ExprDecompiler expr_parser{decomp}; 1764 ExprDecompiler expr_parser{decomp};
1755 const Id condition = expr_parser.Visit(ast.condition); 1765 const Id condition = expr_parser.Visit(ast.condition);
1756 decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition)); 1766 decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition));
1757 } 1767 }
1758 1768
1759 void operator()(VideoCommon::Shader::ASTLabel& ast) { 1769 void operator()([[maybe_unused]] const ASTLabel& ast) {
1760 // Do nothing 1770 // Do nothing
1761 } 1771 }
1762 1772
1763 void operator()(VideoCommon::Shader::ASTGoto& ast) { 1773 void operator()([[maybe_unused]] const ASTGoto& ast) {
1764 UNREACHABLE(); 1774 UNREACHABLE();
1765 } 1775 }
1766 1776
1767 void operator()(VideoCommon::Shader::ASTDoWhile& ast) { 1777 void operator()(const ASTDoWhile& ast) {
1768 const Id loop_label = decomp.OpLabel(); 1778 const Id loop_label = decomp.OpLabel();
1769 const Id endloop_label = decomp.OpLabel(); 1779 const Id endloop_label = decomp.OpLabel();
1770 const Id loop_start_block = decomp.OpLabel(); 1780 const Id loop_start_block = decomp.OpLabel();
@@ -1787,7 +1797,7 @@ public:
1787 decomp.Emit(endloop_label); 1797 decomp.Emit(endloop_label);
1788 } 1798 }
1789 1799
1790 void operator()(VideoCommon::Shader::ASTReturn& ast) { 1800 void operator()(const ASTReturn& ast) {
1791 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { 1801 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
1792 ExprDecompiler expr_parser{decomp}; 1802 ExprDecompiler expr_parser{decomp};
1793 const Id condition = expr_parser.Visit(ast.condition); 1803 const Id condition = expr_parser.Visit(ast.condition);
@@ -1817,7 +1827,7 @@ public:
1817 } 1827 }
1818 } 1828 }
1819 1829
1820 void operator()(VideoCommon::Shader::ASTBreak& ast) { 1830 void operator()(const ASTBreak& ast) {
1821 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { 1831 if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
1822 ExprDecompiler expr_parser{decomp}; 1832 ExprDecompiler expr_parser{decomp};
1823 const Id condition = expr_parser.Visit(ast.condition); 1833 const Id condition = expr_parser.Visit(ast.condition);
@@ -1837,7 +1847,7 @@ public:
1837 } 1847 }
1838 } 1848 }
1839 1849
1840 void Visit(VideoCommon::Shader::ASTNode& node) { 1850 void Visit(const ASTNode& node) {
1841 std::visit(*this, *node->GetInnerData()); 1851 std::visit(*this, *node->GetInnerData());
1842 } 1852 }
1843 1853
@@ -1853,9 +1863,11 @@ void SPIRVDecompiler::DecompileAST() {
1853 Name(id, fmt::format("flow_var_{}", i)); 1863 Name(id, fmt::format("flow_var_{}", i));
1854 flow_variables.emplace(i, AddGlobalVariable(id)); 1864 flow_variables.emplace(i, AddGlobalVariable(id));
1855 } 1865 }
1866
1867 const ASTNode program = ir.GetASTProgram();
1856 ASTDecompiler decompiler{*this}; 1868 ASTDecompiler decompiler{*this};
1857 VideoCommon::Shader::ASTNode program = ir.GetASTProgram();
1858 decompiler.Visit(program); 1869 decompiler.Visit(program);
1870
1859 const Id next_block = OpLabel(); 1871 const Id next_block = OpLabel();
1860 Emit(OpBranch(next_block)); 1872 Emit(OpBranch(next_block));
1861 Emit(next_block); 1873 Emit(next_block);
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
index 436d45f4b..3f96d9076 100644
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@@ -3,6 +3,9 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <string> 5#include <string>
6#include <string_view>
7
8#include <fmt/format.h>
6 9
7#include "common/assert.h" 10#include "common/assert.h"
8#include "common/common_types.h" 11#include "common/common_types.h"
@@ -225,11 +228,16 @@ public:
225 inner += expr.value ? "true" : "false"; 228 inner += expr.value ? "true" : "false";
226 } 229 }
227 230
231 void operator()(const ExprGprEqual& expr) {
232 inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
233 }
234
228 const std::string& GetResult() const { 235 const std::string& GetResult() const {
229 return inner; 236 return inner;
230 } 237 }
231 238
232 std::string inner{}; 239private:
240 std::string inner;
233}; 241};
234 242
235class ASTPrinter { 243class ASTPrinter {
@@ -249,7 +257,7 @@ public:
249 void operator()(const ASTIfThen& ast) { 257 void operator()(const ASTIfThen& ast) {
250 ExprPrinter expr_parser{}; 258 ExprPrinter expr_parser{};
251 std::visit(expr_parser, *ast.condition); 259 std::visit(expr_parser, *ast.condition);
252 inner += Ident() + "if (" + expr_parser.GetResult() + ") {\n"; 260 inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
253 scope++; 261 scope++;
254 ASTNode current = ast.nodes.GetFirst(); 262 ASTNode current = ast.nodes.GetFirst();
255 while (current) { 263 while (current) {
@@ -257,11 +265,13 @@ public:
257 current = current->GetNext(); 265 current = current->GetNext();
258 } 266 }
259 scope--; 267 scope--;
260 inner += Ident() + "}\n"; 268 inner += fmt::format("{}}}\n", Indent());
261 } 269 }
262 270
263 void operator()(const ASTIfElse& ast) { 271 void operator()(const ASTIfElse& ast) {
264 inner += Ident() + "else {\n"; 272 inner += Indent();
273 inner += "else {\n";
274
265 scope++; 275 scope++;
266 ASTNode current = ast.nodes.GetFirst(); 276 ASTNode current = ast.nodes.GetFirst();
267 while (current) { 277 while (current) {
@@ -269,40 +279,41 @@ public:
269 current = current->GetNext(); 279 current = current->GetNext();
270 } 280 }
271 scope--; 281 scope--;
272 inner += Ident() + "}\n"; 282
283 inner += Indent();
284 inner += "}\n";
273 } 285 }
274 286
275 void operator()(const ASTBlockEncoded& ast) { 287 void operator()(const ASTBlockEncoded& ast) {
276 inner += Ident() + "Block(" + std::to_string(ast.start) + ", " + std::to_string(ast.end) + 288 inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
277 ");\n";
278 } 289 }
279 290
280 void operator()(const ASTBlockDecoded& ast) { 291 void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
281 inner += Ident() + "Block;\n"; 292 inner += Indent();
293 inner += "Block;\n";
282 } 294 }
283 295
284 void operator()(const ASTVarSet& ast) { 296 void operator()(const ASTVarSet& ast) {
285 ExprPrinter expr_parser{}; 297 ExprPrinter expr_parser{};
286 std::visit(expr_parser, *ast.condition); 298 std::visit(expr_parser, *ast.condition);
287 inner += 299 inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
288 Ident() + "V" + std::to_string(ast.index) + " := " + expr_parser.GetResult() + ";\n";
289 } 300 }
290 301
291 void operator()(const ASTLabel& ast) { 302 void operator()(const ASTLabel& ast) {
292 inner += "Label_" + std::to_string(ast.index) + ":\n"; 303 inner += fmt::format("Label_{}:\n", ast.index);
293 } 304 }
294 305
295 void operator()(const ASTGoto& ast) { 306 void operator()(const ASTGoto& ast) {
296 ExprPrinter expr_parser{}; 307 ExprPrinter expr_parser{};
297 std::visit(expr_parser, *ast.condition); 308 std::visit(expr_parser, *ast.condition);
298 inner += Ident() + "(" + expr_parser.GetResult() + ") -> goto Label_" + 309 inner +=
299 std::to_string(ast.label) + ";\n"; 310 fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
300 } 311 }
301 312
302 void operator()(const ASTDoWhile& ast) { 313 void operator()(const ASTDoWhile& ast) {
303 ExprPrinter expr_parser{}; 314 ExprPrinter expr_parser{};
304 std::visit(expr_parser, *ast.condition); 315 std::visit(expr_parser, *ast.condition);
305 inner += Ident() + "do {\n"; 316 inner += fmt::format("{}do {{\n", Indent());
306 scope++; 317 scope++;
307 ASTNode current = ast.nodes.GetFirst(); 318 ASTNode current = ast.nodes.GetFirst();
308 while (current) { 319 while (current) {
@@ -310,32 +321,23 @@ public:
310 current = current->GetNext(); 321 current = current->GetNext();
311 } 322 }
312 scope--; 323 scope--;
313 inner += Ident() + "} while (" + expr_parser.GetResult() + ");\n"; 324 inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
314 } 325 }
315 326
316 void operator()(const ASTReturn& ast) { 327 void operator()(const ASTReturn& ast) {
317 ExprPrinter expr_parser{}; 328 ExprPrinter expr_parser{};
318 std::visit(expr_parser, *ast.condition); 329 std::visit(expr_parser, *ast.condition);
319 inner += Ident() + "(" + expr_parser.GetResult() + ") -> " + 330 inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
320 (ast.kills ? "discard" : "exit") + ";\n"; 331 ast.kills ? "discard" : "exit");
321 } 332 }
322 333
323 void operator()(const ASTBreak& ast) { 334 void operator()(const ASTBreak& ast) {
324 ExprPrinter expr_parser{}; 335 ExprPrinter expr_parser{};
325 std::visit(expr_parser, *ast.condition); 336 std::visit(expr_parser, *ast.condition);
326 inner += Ident() + "(" + expr_parser.GetResult() + ") -> break;\n"; 337 inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
327 } 338 }
328 339
329 std::string& Ident() { 340 void Visit(const ASTNode& node) {
330 if (memo_scope == scope) {
331 return tabs_memo;
332 }
333 tabs_memo = tabs.substr(0, scope * 2);
334 memo_scope = scope;
335 return tabs_memo;
336 }
337
338 void Visit(ASTNode& node) {
339 std::visit(*this, *node->GetInnerData()); 341 std::visit(*this, *node->GetInnerData());
340 } 342 }
341 343
@@ -344,16 +346,29 @@ public:
344 } 346 }
345 347
346private: 348private:
349 std::string_view Indent() {
350 if (space_segment_scope == scope) {
351 return space_segment;
352 }
353
354 // Ensure that we don't exceed our view.
355 ASSERT(scope * 2 < spaces.size());
356
357 space_segment = spaces.substr(0, scope * 2);
358 space_segment_scope = scope;
359 return space_segment;
360 }
361
347 std::string inner{}; 362 std::string inner{};
348 u32 scope{}; 363 std::string_view space_segment;
349 364
350 std::string tabs_memo{}; 365 u32 scope{};
351 u32 memo_scope{}; 366 u32 space_segment_scope{};
352 367
353 static constexpr std::string_view tabs{" "}; 368 static constexpr std::string_view spaces{" "};
354}; 369};
355 370
356std::string ASTManager::Print() { 371std::string ASTManager::Print() const {
357 ASTPrinter printer{}; 372 ASTPrinter printer{};
358 printer.Visit(main_node); 373 printer.Visit(main_node);
359 return printer.GetResult(); 374 return printer.GetResult();
@@ -549,13 +564,13 @@ bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) co
549 return min->GetParent() == max->GetParent(); 564 return min->GetParent() == max->GetParent();
550} 565}
551 566
552void ASTManager::ShowCurrentState(std::string_view state) { 567void ASTManager::ShowCurrentState(std::string_view state) const {
553 LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); 568 LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
554 SanityCheck(); 569 SanityCheck();
555} 570}
556 571
557void ASTManager::SanityCheck() { 572void ASTManager::SanityCheck() const {
558 for (auto& label : labels) { 573 for (const auto& label : labels) {
559 if (!label->GetParent()) { 574 if (!label->GetParent()) {
560 LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); 575 LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
561 } 576 }
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index d7bf11821..a2f0044ba 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -328,13 +328,13 @@ public:
328 328
329 void InsertReturn(Expr condition, bool kills); 329 void InsertReturn(Expr condition, bool kills);
330 330
331 std::string Print(); 331 std::string Print() const;
332 332
333 void Decompile(); 333 void Decompile();
334 334
335 void ShowCurrentState(std::string_view state); 335 void ShowCurrentState(std::string_view state) const;
336 336
337 void SanityCheck(); 337 void SanityCheck() const;
338 338
339 void Clear(); 339 void Clear();
340 340
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
new file mode 100644
index 000000000..fe467608e
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -0,0 +1,110 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <memory>
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/maxwell_3d.h"
12#include "video_core/shader/const_buffer_locker.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Engines::SamplerDescriptor;
17
18ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
19 : stage{shader_stage} {}
20
21ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
22 Tegra::Engines::ConstBufferEngineInterface& engine)
23 : stage{shader_stage}, engine{&engine} {}
24
25ConstBufferLocker::~ConstBufferLocker() = default;
26
27std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
28 const std::pair<u32, u32> key = {buffer, offset};
29 const auto iter = keys.find(key);
30 if (iter != keys.end()) {
31 return iter->second;
32 }
33 if (!engine) {
34 return std::nullopt;
35 }
36 const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
37 keys.emplace(key, value);
38 return value;
39}
40
41std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
42 const u32 key = offset;
43 const auto iter = bound_samplers.find(key);
44 if (iter != bound_samplers.end()) {
45 return iter->second;
46 }
47 if (!engine) {
48 return std::nullopt;
49 }
50 const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
51 bound_samplers.emplace(key, value);
52 return value;
53}
54
55std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
56 u32 buffer, u32 offset) {
57 const std::pair key = {buffer, offset};
58 const auto iter = bindless_samplers.find(key);
59 if (iter != bindless_samplers.end()) {
60 return iter->second;
61 }
62 if (!engine) {
63 return std::nullopt;
64 }
65 const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
66 bindless_samplers.emplace(key, value);
67 return value;
68}
69
70void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
71 keys.insert_or_assign({buffer, offset}, value);
72}
73
74void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
75 bound_samplers.insert_or_assign(offset, sampler);
76}
77
78void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
79 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
80}
81
82bool ConstBufferLocker::IsConsistent() const {
83 if (!engine) {
84 return false;
85 }
86 return std::all_of(keys.begin(), keys.end(),
87 [this](const auto& pair) {
88 const auto [cbuf, offset] = pair.first;
89 const auto value = pair.second;
90 return value == engine->AccessConstBuffer32(stage, cbuf, offset);
91 }) &&
92 std::all_of(bound_samplers.begin(), bound_samplers.end(),
93 [this](const auto& sampler) {
94 const auto [key, value] = sampler;
95 return value == engine->AccessBoundSampler(stage, key);
96 }) &&
97 std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
98 [this](const auto& sampler) {
99 const auto [cbuf, offset] = sampler.first;
100 const auto value = sampler.second;
101 return value == engine->AccessBindlessSampler(stage, cbuf, offset);
102 });
103}
104
105bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
106 return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
107 bindless_samplers == rhs.bindless_samplers;
108}
109
110} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
new file mode 100644
index 000000000..600e2f3c3
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -0,0 +1,80 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <unordered_map>
8#include "common/common_types.h"
9#include "common/hash.h"
10#include "video_core/engines/const_buffer_engine_interface.h"
11
12namespace VideoCommon::Shader {
13
14using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
15using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
16using BindlessSamplerMap =
17 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
18
19/**
20 * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
21 * compiler. with it, the shader can obtain required data from GPU state and store it for disk
22 * shader compilation.
23 **/
24class ConstBufferLocker {
25public:
26 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
27
28 explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
29 Tegra::Engines::ConstBufferEngineInterface& engine);
30
31 ~ConstBufferLocker();
32
33 /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
34 /// not it will obtain it from maxwell3d and register it.
35 std::optional<u32> ObtainKey(u32 buffer, u32 offset);
36
37 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
38
39 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
40
41 /// Inserts a key.
42 void InsertKey(u32 buffer, u32 offset, u32 value);
43
44 /// Inserts a bound sampler key.
45 void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
46
47 /// Inserts a bindless sampler key.
48 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
49
50 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
51 /// the same value, false otherwise;
52 bool IsConsistent() const;
53
54 /// Returns true if the keys are equal to the other ones in the locker.
55 bool HasEqualKeys(const ConstBufferLocker& rhs) const;
56
57 /// Gives an getter to the const buffer keys in the database.
58 const KeyMap& GetKeys() const {
59 return keys;
60 }
61
62 /// Gets samplers database.
63 const BoundSamplerMap& GetBoundSamplers() const {
64 return bound_samplers;
65 }
66
67 /// Gets bindless samplers database.
68 const BindlessSamplerMap& GetBindlessSamplers() const {
69 return bindless_samplers;
70 }
71
72private:
73 const Tegra::Engines::ShaderType stage;
74 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
75 KeyMap keys;
76 BoundSamplerMap bound_samplers;
77 BindlessSamplerMap bindless_samplers;
78};
79
80} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 268d1aed0..d47c63d9f 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -35,14 +35,20 @@ struct BlockStack {
35 std::stack<u32> pbk_stack{}; 35 std::stack<u32> pbk_stack{};
36}; 36};
37 37
38struct BlockBranchInfo { 38template <typename T, typename... Args>
39 Condition condition{}; 39BlockBranchInfo MakeBranchInfo(Args&&... args) {
40 s32 address{exit_branch}; 40 static_assert(std::is_convertible_v<T, BranchData>);
41 bool kill{}; 41 return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
42 bool is_sync{}; 42}
43 bool is_brk{}; 43
44 bool ignore{}; 44bool BlockBranchIsIgnored(BlockBranchInfo first) {
45}; 45 bool ignore = false;
46 if (std::holds_alternative<SingleBranch>(*first)) {
47 const auto branch = std::get_if<SingleBranch>(first.get());
48 ignore = branch->ignore;
49 }
50 return ignore;
51}
46 52
47struct BlockInfo { 53struct BlockInfo {
48 u32 start{}; 54 u32 start{};
@@ -56,10 +62,11 @@ struct BlockInfo {
56}; 62};
57 63
58struct CFGRebuildState { 64struct CFGRebuildState {
59 explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, 65 explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
60 const u32 start) 66 : program_code{program_code}, start{start}, locker{locker} {}
61 : start{start}, program_code{program_code}, program_size{program_size} {}
62 67
68 const ProgramCode& program_code;
69 ConstBufferLocker& locker;
63 u32 start{}; 70 u32 start{};
64 std::vector<BlockInfo> block_info{}; 71 std::vector<BlockInfo> block_info{};
65 std::list<u32> inspect_queries{}; 72 std::list<u32> inspect_queries{};
@@ -69,8 +76,6 @@ struct CFGRebuildState {
69 std::map<u32, u32> ssy_labels{}; 76 std::map<u32, u32> ssy_labels{};
70 std::map<u32, u32> pbk_labels{}; 77 std::map<u32, u32> pbk_labels{};
71 std::unordered_map<u32, BlockStack> stacks{}; 78 std::unordered_map<u32, BlockStack> stacks{};
72 const ProgramCode& program_code;
73 const std::size_t program_size;
74 ASTManager* manager; 79 ASTManager* manager;
75}; 80};
76 81
@@ -124,10 +129,116 @@ enum class ParseResult : u32 {
124 AbnormalFlow, 129 AbnormalFlow,
125}; 130};
126 131
132struct BranchIndirectInfo {
133 u32 buffer{};
134 u32 offset{};
135 u32 entries{};
136 s32 relative_position{};
137};
138
139std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
140 u32 start_address, u32 current_position) {
141 const u32 shader_start = state.start;
142 u32 pos = current_position;
143 BranchIndirectInfo result{};
144 u64 track_register = 0;
145
146 // Step 0 Get BRX Info
147 const Instruction instr = {state.program_code[pos]};
148 const auto opcode = OpCode::Decode(instr);
149 if (opcode->get().GetId() != OpCode::Id::BRX) {
150 return std::nullopt;
151 }
152 if (instr.brx.constant_buffer != 0) {
153 return std::nullopt;
154 }
155 track_register = instr.gpr8.Value();
156 result.relative_position = instr.brx.GetBranchExtend();
157 pos--;
158 bool found_track = false;
159
160 // Step 1 Track LDC
161 while (pos >= shader_start) {
162 if (IsSchedInstruction(pos, shader_start)) {
163 pos--;
164 continue;
165 }
166 const Instruction instr = {state.program_code[pos]};
167 const auto opcode = OpCode::Decode(instr);
168 if (opcode->get().GetId() == OpCode::Id::LD_C) {
169 if (instr.gpr0.Value() == track_register &&
170 instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
171 result.buffer = instr.cbuf36.index.Value();
172 result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
173 track_register = instr.gpr8.Value();
174 pos--;
175 found_track = true;
176 break;
177 }
178 }
179 pos--;
180 }
181
182 if (!found_track) {
183 return std::nullopt;
184 }
185 found_track = false;
186
187 // Step 2 Track SHL
188 while (pos >= shader_start) {
189 if (IsSchedInstruction(pos, shader_start)) {
190 pos--;
191 continue;
192 }
193 const Instruction instr = state.program_code[pos];
194 const auto opcode = OpCode::Decode(instr);
195 if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
196 if (instr.gpr0.Value() == track_register) {
197 track_register = instr.gpr8.Value();
198 pos--;
199 found_track = true;
200 break;
201 }
202 }
203 pos--;
204 }
205
206 if (!found_track) {
207 return std::nullopt;
208 }
209 found_track = false;
210
211 // Step 3 Track IMNMX
212 while (pos >= shader_start) {
213 if (IsSchedInstruction(pos, shader_start)) {
214 pos--;
215 continue;
216 }
217 const Instruction instr = state.program_code[pos];
218 const auto opcode = OpCode::Decode(instr);
219 if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
220 if (instr.gpr0.Value() == track_register) {
221 track_register = instr.gpr8.Value();
222 result.entries = instr.alu.GetSignedImm20_20() + 1;
223 pos--;
224 found_track = true;
225 break;
226 }
227 }
228 pos--;
229 }
230
231 if (!found_track) {
232 return std::nullopt;
233 }
234 return result;
235}
236
127std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { 237std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
128 u32 offset = static_cast<u32>(address); 238 u32 offset = static_cast<u32>(address);
129 const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); 239 const u32 end_address = static_cast<u32>(state.program_code.size());
130 ParseInfo parse_info{}; 240 ParseInfo parse_info{};
241 SingleBranch single_branch{};
131 242
132 const auto insert_label = [](CFGRebuildState& state, u32 address) { 243 const auto insert_label = [](CFGRebuildState& state, u32 address) {
133 const auto pair = state.labels.emplace(address); 244 const auto pair = state.labels.emplace(address);
@@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
140 if (offset >= end_address) { 251 if (offset >= end_address) {
141 // ASSERT_OR_EXECUTE can't be used, as it ignores the break 252 // ASSERT_OR_EXECUTE can't be used, as it ignores the break
142 ASSERT_MSG(false, "Shader passed the current limit!"); 253 ASSERT_MSG(false, "Shader passed the current limit!");
143 parse_info.branch_info.address = exit_branch; 254
144 parse_info.branch_info.ignore = false; 255 single_branch.address = exit_branch;
256 single_branch.ignore = false;
145 break; 257 break;
146 } 258 }
147 if (state.registered.count(offset) != 0) { 259 if (state.registered.count(offset) != 0) {
148 parse_info.branch_info.address = offset; 260 single_branch.address = offset;
149 parse_info.branch_info.ignore = true; 261 single_branch.ignore = true;
150 break; 262 break;
151 } 263 }
152 if (IsSchedInstruction(offset, state.start)) { 264 if (IsSchedInstruction(offset, state.start)) {
@@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
163 switch (opcode->get().GetId()) { 275 switch (opcode->get().GetId()) {
164 case OpCode::Id::EXIT: { 276 case OpCode::Id::EXIT: {
165 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 277 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
166 parse_info.branch_info.condition.predicate = 278 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
167 GetPredicate(pred_index, instr.negate_pred != 0); 279 if (single_branch.condition.predicate == Pred::NeverExecute) {
168 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
169 offset++; 280 offset++;
170 continue; 281 continue;
171 } 282 }
172 const ConditionCode cc = instr.flow_condition_code; 283 const ConditionCode cc = instr.flow_condition_code;
173 parse_info.branch_info.condition.cc = cc; 284 single_branch.condition.cc = cc;
174 if (cc == ConditionCode::F) { 285 if (cc == ConditionCode::F) {
175 offset++; 286 offset++;
176 continue; 287 continue;
177 } 288 }
178 parse_info.branch_info.address = exit_branch; 289 single_branch.address = exit_branch;
179 parse_info.branch_info.kill = false; 290 single_branch.kill = false;
180 parse_info.branch_info.is_sync = false; 291 single_branch.is_sync = false;
181 parse_info.branch_info.is_brk = false; 292 single_branch.is_brk = false;
182 parse_info.branch_info.ignore = false; 293 single_branch.ignore = false;
183 parse_info.end_address = offset; 294 parse_info.end_address = offset;
295 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
296 single_branch.condition, single_branch.address, single_branch.kill,
297 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
184 298
185 return {ParseResult::ControlCaught, parse_info}; 299 return {ParseResult::ControlCaught, parse_info};
186 } 300 }
@@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
189 return {ParseResult::AbnormalFlow, parse_info}; 303 return {ParseResult::AbnormalFlow, parse_info};
190 } 304 }
191 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 305 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
192 parse_info.branch_info.condition.predicate = 306 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
193 GetPredicate(pred_index, instr.negate_pred != 0); 307 if (single_branch.condition.predicate == Pred::NeverExecute) {
194 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
195 offset++; 308 offset++;
196 continue; 309 continue;
197 } 310 }
198 const ConditionCode cc = instr.flow_condition_code; 311 const ConditionCode cc = instr.flow_condition_code;
199 parse_info.branch_info.condition.cc = cc; 312 single_branch.condition.cc = cc;
200 if (cc == ConditionCode::F) { 313 if (cc == ConditionCode::F) {
201 offset++; 314 offset++;
202 continue; 315 continue;
203 } 316 }
204 const u32 branch_offset = offset + instr.bra.GetBranchTarget(); 317 const u32 branch_offset = offset + instr.bra.GetBranchTarget();
205 if (branch_offset == 0) { 318 if (branch_offset == 0) {
206 parse_info.branch_info.address = exit_branch; 319 single_branch.address = exit_branch;
207 } else { 320 } else {
208 parse_info.branch_info.address = branch_offset; 321 single_branch.address = branch_offset;
209 } 322 }
210 insert_label(state, branch_offset); 323 insert_label(state, branch_offset);
211 parse_info.branch_info.kill = false; 324 single_branch.kill = false;
212 parse_info.branch_info.is_sync = false; 325 single_branch.is_sync = false;
213 parse_info.branch_info.is_brk = false; 326 single_branch.is_brk = false;
214 parse_info.branch_info.ignore = false; 327 single_branch.ignore = false;
215 parse_info.end_address = offset; 328 parse_info.end_address = offset;
329 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
330 single_branch.condition, single_branch.address, single_branch.kill,
331 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
216 332
217 return {ParseResult::ControlCaught, parse_info}; 333 return {ParseResult::ControlCaught, parse_info};
218 } 334 }
219 case OpCode::Id::SYNC: { 335 case OpCode::Id::SYNC: {
220 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 336 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
221 parse_info.branch_info.condition.predicate = 337 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
222 GetPredicate(pred_index, instr.negate_pred != 0); 338 if (single_branch.condition.predicate == Pred::NeverExecute) {
223 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
224 offset++; 339 offset++;
225 continue; 340 continue;
226 } 341 }
227 const ConditionCode cc = instr.flow_condition_code; 342 const ConditionCode cc = instr.flow_condition_code;
228 parse_info.branch_info.condition.cc = cc; 343 single_branch.condition.cc = cc;
229 if (cc == ConditionCode::F) { 344 if (cc == ConditionCode::F) {
230 offset++; 345 offset++;
231 continue; 346 continue;
232 } 347 }
233 parse_info.branch_info.address = unassigned_branch; 348 single_branch.address = unassigned_branch;
234 parse_info.branch_info.kill = false; 349 single_branch.kill = false;
235 parse_info.branch_info.is_sync = true; 350 single_branch.is_sync = true;
236 parse_info.branch_info.is_brk = false; 351 single_branch.is_brk = false;
237 parse_info.branch_info.ignore = false; 352 single_branch.ignore = false;
238 parse_info.end_address = offset; 353 parse_info.end_address = offset;
354 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
355 single_branch.condition, single_branch.address, single_branch.kill,
356 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
239 357
240 return {ParseResult::ControlCaught, parse_info}; 358 return {ParseResult::ControlCaught, parse_info};
241 } 359 }
242 case OpCode::Id::BRK: { 360 case OpCode::Id::BRK: {
243 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 361 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
244 parse_info.branch_info.condition.predicate = 362 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
245 GetPredicate(pred_index, instr.negate_pred != 0); 363 if (single_branch.condition.predicate == Pred::NeverExecute) {
246 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
247 offset++; 364 offset++;
248 continue; 365 continue;
249 } 366 }
250 const ConditionCode cc = instr.flow_condition_code; 367 const ConditionCode cc = instr.flow_condition_code;
251 parse_info.branch_info.condition.cc = cc; 368 single_branch.condition.cc = cc;
252 if (cc == ConditionCode::F) { 369 if (cc == ConditionCode::F) {
253 offset++; 370 offset++;
254 continue; 371 continue;
255 } 372 }
256 parse_info.branch_info.address = unassigned_branch; 373 single_branch.address = unassigned_branch;
257 parse_info.branch_info.kill = false; 374 single_branch.kill = false;
258 parse_info.branch_info.is_sync = false; 375 single_branch.is_sync = false;
259 parse_info.branch_info.is_brk = true; 376 single_branch.is_brk = true;
260 parse_info.branch_info.ignore = false; 377 single_branch.ignore = false;
261 parse_info.end_address = offset; 378 parse_info.end_address = offset;
379 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
380 single_branch.condition, single_branch.address, single_branch.kill,
381 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
262 382
263 return {ParseResult::ControlCaught, parse_info}; 383 return {ParseResult::ControlCaught, parse_info};
264 } 384 }
265 case OpCode::Id::KIL: { 385 case OpCode::Id::KIL: {
266 const auto pred_index = static_cast<u32>(instr.pred.pred_index); 386 const auto pred_index = static_cast<u32>(instr.pred.pred_index);
267 parse_info.branch_info.condition.predicate = 387 single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
268 GetPredicate(pred_index, instr.negate_pred != 0); 388 if (single_branch.condition.predicate == Pred::NeverExecute) {
269 if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
270 offset++; 389 offset++;
271 continue; 390 continue;
272 } 391 }
273 const ConditionCode cc = instr.flow_condition_code; 392 const ConditionCode cc = instr.flow_condition_code;
274 parse_info.branch_info.condition.cc = cc; 393 single_branch.condition.cc = cc;
275 if (cc == ConditionCode::F) { 394 if (cc == ConditionCode::F) {
276 offset++; 395 offset++;
277 continue; 396 continue;
278 } 397 }
279 parse_info.branch_info.address = exit_branch; 398 single_branch.address = exit_branch;
280 parse_info.branch_info.kill = true; 399 single_branch.kill = true;
281 parse_info.branch_info.is_sync = false; 400 single_branch.is_sync = false;
282 parse_info.branch_info.is_brk = false; 401 single_branch.is_brk = false;
283 parse_info.branch_info.ignore = false; 402 single_branch.ignore = false;
284 parse_info.end_address = offset; 403 parse_info.end_address = offset;
404 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
405 single_branch.condition, single_branch.address, single_branch.kill,
406 single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
285 407
286 return {ParseResult::ControlCaught, parse_info}; 408 return {ParseResult::ControlCaught, parse_info};
287 } 409 }
@@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
298 break; 420 break;
299 } 421 }
300 case OpCode::Id::BRX: { 422 case OpCode::Id::BRX: {
423 auto tmp = TrackBranchIndirectInfo(state, address, offset);
424 if (tmp) {
425 auto result = *tmp;
426 std::vector<CaseBranch> branches{};
427 s32 pc_target = offset + result.relative_position;
428 for (u32 i = 0; i < result.entries; i++) {
429 auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
430 if (!k) {
431 return {ParseResult::AbnormalFlow, parse_info};
432 }
433 u32 value = *k;
434 u32 target = static_cast<u32>((value >> 3) + pc_target);
435 insert_label(state, target);
436 branches.emplace_back(value, target);
437 }
438 parse_info.end_address = offset;
439 parse_info.branch_info = MakeBranchInfo<MultiBranch>(
440 static_cast<u32>(instr.gpr8.Value()), std::move(branches));
441
442 return {ParseResult::ControlCaught, parse_info};
443 } else {
444 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
445 }
301 return {ParseResult::AbnormalFlow, parse_info}; 446 return {ParseResult::AbnormalFlow, parse_info};
302 } 447 }
303 default: 448 default:
@@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
306 451
307 offset++; 452 offset++;
308 } 453 }
309 parse_info.branch_info.kill = false; 454 single_branch.kill = false;
310 parse_info.branch_info.is_sync = false; 455 single_branch.is_sync = false;
311 parse_info.branch_info.is_brk = false; 456 single_branch.is_brk = false;
312 parse_info.end_address = offset - 1; 457 parse_info.end_address = offset - 1;
458 parse_info.branch_info = MakeBranchInfo<SingleBranch>(
459 single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
460 single_branch.is_brk, single_branch.ignore);
313 return {ParseResult::BlockEnd, parse_info}; 461 return {ParseResult::BlockEnd, parse_info};
314} 462}
315 463
@@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
333 BlockInfo& current_block = state.block_info[block_index]; 481 BlockInfo& current_block = state.block_info[block_index];
334 current_block.end = address - 1; 482 current_block.end = address - 1;
335 new_block.branch = current_block.branch; 483 new_block.branch = current_block.branch;
336 BlockBranchInfo forward_branch{}; 484 BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
337 forward_branch.address = address; 485 const auto branch = std::get_if<SingleBranch>(forward_branch.get());
338 forward_branch.ignore = true; 486 branch->address = address;
487 branch->ignore = true;
339 current_block.branch = forward_branch; 488 current_block.branch = forward_branch;
340 return true; 489 return true;
341 } 490 }
@@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
350 499
351 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); 500 BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
352 block_info.branch = parse_info.branch_info; 501 block_info.branch = parse_info.branch_info;
353 if (parse_info.branch_info.condition.IsUnconditional()) { 502 if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
503 const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
504 if (branch->condition.IsUnconditional()) {
505 return true;
506 }
507 const u32 fallthrough_address = parse_info.end_address + 1;
508 state.inspect_queries.push_front(fallthrough_address);
354 return true; 509 return true;
355 } 510 }
356
357 const u32 fallthrough_address = parse_info.end_address + 1;
358 state.inspect_queries.push_front(fallthrough_address);
359 return true; 511 return true;
360} 512}
361 513
@@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) {
393 state.queries.pop_front(); 545 state.queries.pop_front();
394 gather_labels(q2.ssy_stack, state.ssy_labels, block); 546 gather_labels(q2.ssy_stack, state.ssy_labels, block);
395 gather_labels(q2.pbk_stack, state.pbk_labels, block); 547 gather_labels(q2.pbk_stack, state.pbk_labels, block);
396 if (!block.branch.condition.IsUnconditional()) { 548 if (std::holds_alternative<SingleBranch>(*block.branch)) {
397 q2.address = block.end + 1; 549 const auto branch = std::get_if<SingleBranch>(block.branch.get());
398 state.queries.push_back(q2); 550 if (!branch->condition.IsUnconditional()) {
399 } 551 q2.address = block.end + 1;
552 state.queries.push_back(q2);
553 }
400 554
401 Query conditional_query{q2}; 555 Query conditional_query{q2};
402 if (block.branch.is_sync) { 556 if (branch->is_sync) {
403 if (block.branch.address == unassigned_branch) { 557 if (branch->address == unassigned_branch) {
404 block.branch.address = conditional_query.ssy_stack.top(); 558 branch->address = conditional_query.ssy_stack.top();
559 }
560 conditional_query.ssy_stack.pop();
405 } 561 }
406 conditional_query.ssy_stack.pop(); 562 if (branch->is_brk) {
407 } 563 if (branch->address == unassigned_branch) {
408 if (block.branch.is_brk) { 564 branch->address = conditional_query.pbk_stack.top();
409 if (block.branch.address == unassigned_branch) { 565 }
410 block.branch.address = conditional_query.pbk_stack.top(); 566 conditional_query.pbk_stack.pop();
411 } 567 }
412 conditional_query.pbk_stack.pop(); 568 conditional_query.address = branch->address;
569 state.queries.push_back(std::move(conditional_query));
570 return true;
571 }
572 const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
573 for (const auto& branch_case : multi_branch->branches) {
574 Query conditional_query{q2};
575 conditional_query.address = branch_case.address;
576 state.queries.push_back(std::move(conditional_query));
413 } 577 }
414 conditional_query.address = block.branch.address;
415 state.queries.push_back(std::move(conditional_query));
416 return true; 578 return true;
417} 579}
580
418} // Anonymous namespace 581} // Anonymous namespace
419 582
420void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { 583void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
421 const auto get_expr = ([&](const Condition& cond) -> Expr { 584 const auto get_expr = ([&](const Condition& cond) -> Expr {
422 Expr result{}; 585 Expr result{};
423 if (cond.cc != ConditionCode::T) { 586 if (cond.cc != ConditionCode::T) {
@@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) {
444 } 607 }
445 return MakeExpr<ExprBoolean>(true); 608 return MakeExpr<ExprBoolean>(true);
446 }); 609 });
447 if (branch.address < 0) { 610 if (std::holds_alternative<SingleBranch>(*branch_info)) {
448 if (branch.kill) { 611 const auto branch = std::get_if<SingleBranch>(branch_info.get());
449 mm.InsertReturn(get_expr(branch.condition), true); 612 if (branch->address < 0) {
613 if (branch->kill) {
614 mm.InsertReturn(get_expr(branch->condition), true);
615 return;
616 }
617 mm.InsertReturn(get_expr(branch->condition), false);
450 return; 618 return;
451 } 619 }
452 mm.InsertReturn(get_expr(branch.condition), false); 620 mm.InsertGoto(get_expr(branch->condition), branch->address);
453 return; 621 return;
454 } 622 }
455 mm.InsertGoto(get_expr(branch.condition), branch.address); 623 const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
624 for (const auto& branch_case : multi_branch->branches) {
625 mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
626 branch_case.address);
627 }
456} 628}
457 629
458void DecompileShader(CFGRebuildState& state) { 630void DecompileShader(CFGRebuildState& state) {
@@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) {
464 if (state.labels.count(block.start) != 0) { 636 if (state.labels.count(block.start) != 0) {
465 state.manager->InsertLabel(block.start); 637 state.manager->InsertLabel(block.start);
466 } 638 }
467 u32 end = block.branch.ignore ? block.end + 1 : block.end; 639 const bool ignore = BlockBranchIsIgnored(block.branch);
640 u32 end = ignore ? block.end + 1 : block.end;
468 state.manager->InsertBlock(block.start, end); 641 state.manager->InsertBlock(block.start, end);
469 if (!block.branch.ignore) { 642 if (!ignore) {
470 InsertBranch(*state.manager, block.branch); 643 InsertBranch(*state.manager, block.branch);
471 } 644 }
472 } 645 }
473 state.manager->Decompile(); 646 state.manager->Decompile();
474} 647}
475 648
476std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, 649std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
477 u32 start_address, 650 const CompilerSettings& settings,
478 const CompilerSettings& settings) { 651 ConstBufferLocker& locker) {
479 auto result_out = std::make_unique<ShaderCharacteristics>(); 652 auto result_out = std::make_unique<ShaderCharacteristics>();
480 if (settings.depth == CompileDepth::BruteForce) { 653 if (settings.depth == CompileDepth::BruteForce) {
481 result_out->settings.depth = CompileDepth::BruteForce; 654 result_out->settings.depth = CompileDepth::BruteForce;
482 return result_out; 655 return result_out;
483 } 656 }
484 657
485 CFGRebuildState state{program_code, program_size, start_address}; 658 CFGRebuildState state{program_code, start_address, locker};
486 // Inspect Code and generate blocks 659 // Inspect Code and generate blocks
487 state.labels.clear(); 660 state.labels.clear();
488 state.labels.emplace(start_address); 661 state.labels.emplace(start_address);
@@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
547 ShaderBlock new_block{}; 720 ShaderBlock new_block{};
548 new_block.start = block.start; 721 new_block.start = block.start;
549 new_block.end = block.end; 722 new_block.end = block.end;
550 new_block.ignore_branch = block.branch.ignore; 723 new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
551 if (!new_block.ignore_branch) { 724 if (!new_block.ignore_branch) {
552 new_block.branch.cond = block.branch.condition; 725 new_block.branch = block.branch;
553 new_block.branch.kills = block.branch.kill;
554 new_block.branch.address = block.branch.address;
555 } 726 }
556 result_out->end = std::max(result_out->end, block.end); 727 result_out->end = std::max(result_out->end, block.end);
557 result_out->blocks.push_back(new_block); 728 result_out->blocks.push_back(new_block);
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index 74e54a5c7..5304998b9 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -7,6 +7,7 @@
7#include <list> 7#include <list>
8#include <optional> 8#include <optional>
9#include <set> 9#include <set>
10#include <variant>
10 11
11#include "video_core/engines/shader_bytecode.h" 12#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/ast.h" 13#include "video_core/shader/ast.h"
@@ -37,29 +38,61 @@ struct Condition {
37 } 38 }
38}; 39};
39 40
40struct ShaderBlock { 41class SingleBranch {
41 struct Branch { 42public:
42 Condition cond{}; 43 SingleBranch() = default;
43 bool kills{}; 44 SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
44 s32 address{}; 45 bool ignore)
46 : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
47 ignore{ignore} {}
48
49 bool operator==(const SingleBranch& b) const {
50 return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
51 std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
52 }
53
54 bool operator!=(const SingleBranch& b) const {
55 return !operator==(b);
56 }
57
58 Condition condition{};
59 s32 address{exit_branch};
60 bool kill{};
61 bool is_sync{};
62 bool is_brk{};
63 bool ignore{};
64};
45 65
46 bool operator==(const Branch& b) const { 66struct CaseBranch {
47 return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); 67 CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
48 } 68 u32 cmp_value;
69 u32 address;
70};
71
72class MultiBranch {
73public:
74 MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
75 : gpr{gpr}, branches{std::move(branches)} {}
76
77 u32 gpr{};
78 std::vector<CaseBranch> branches{};
79};
80
81using BranchData = std::variant<SingleBranch, MultiBranch>;
82using BlockBranchInfo = std::shared_ptr<BranchData>;
49 83
50 bool operator!=(const Branch& b) const { 84bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
51 return !operator==(b);
52 }
53 };
54 85
86struct ShaderBlock {
55 u32 start{}; 87 u32 start{};
56 u32 end{}; 88 u32 end{};
57 bool ignore_branch{}; 89 bool ignore_branch{};
58 Branch branch{}; 90 BlockBranchInfo branch{};
59 91
60 bool operator==(const ShaderBlock& sb) const { 92 bool operator==(const ShaderBlock& sb) const {
61 return std::tie(start, end, ignore_branch, branch) == 93 return std::tie(start, end, ignore_branch) ==
62 std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); 94 std::tie(sb.start, sb.end, sb.ignore_branch) &&
95 BlockBranchInfoAreEqual(branch, sb.branch);
63 } 96 }
64 97
65 bool operator!=(const ShaderBlock& sb) const { 98 bool operator!=(const ShaderBlock& sb) const {
@@ -76,8 +109,8 @@ struct ShaderCharacteristics {
76 CompilerSettings settings{}; 109 CompilerSettings settings{};
77}; 110};
78 111
79std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, 112std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
80 u32 start_address, 113 const CompilerSettings& settings,
81 const CompilerSettings& settings); 114 ConstBufferLocker& locker);
82 115
83} // namespace VideoCommon::Shader 116} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2626b1616..21fb9cb83 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
33 return (absolute_offset % SchedPeriod) == 0; 33 return (absolute_offset % SchedPeriod) == 0;
34} 34}
35 35
36} // namespace 36} // Anonymous namespace
37 37
38class ASTDecoder { 38class ASTDecoder {
39public: 39public:
@@ -102,7 +102,7 @@ void ShaderIR::Decode() {
102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); 102 std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
103 103
104 decompiled = false; 104 decompiled = false;
105 auto info = ScanFlow(program_code, program_size, main_offset, settings); 105 auto info = ScanFlow(program_code, main_offset, settings, locker);
106 auto& shader_info = *info; 106 auto& shader_info = *info;
107 coverage_begin = shader_info.start; 107 coverage_begin = shader_info.start;
108 coverage_end = shader_info.end; 108 coverage_end = shader_info.end;
@@ -155,7 +155,7 @@ void ShaderIR::Decode() {
155 [[fallthrough]]; 155 [[fallthrough]];
156 case CompileDepth::BruteForce: { 156 case CompileDepth::BruteForce: {
157 coverage_begin = main_offset; 157 coverage_begin = main_offset;
158 const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); 158 const std::size_t shader_end = program_code.size();
159 coverage_end = shader_end; 159 coverage_end = shader_end;
160 for (u32 label = main_offset; label < shader_end; label++) { 160 for (u32 label = main_offset; label < shader_end; label++) {
161 basic_blocks.insert({label, DecodeRange(label, label + 1)}); 161 basic_blocks.insert({label, DecodeRange(label, label + 1)});
@@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
198 } 198 }
199 return result; 199 return result;
200 }; 200 };
201 if (block.branch.address < 0) { 201 if (std::holds_alternative<SingleBranch>(*block.branch)) {
202 if (block.branch.kills) { 202 auto branch = std::get_if<SingleBranch>(block.branch.get());
203 Node n = Operation(OperationCode::Discard); 203 if (branch->address < 0) {
204 n = apply_conditions(block.branch.cond, n); 204 if (branch->kill) {
205 Node n = Operation(OperationCode::Discard);
206 n = apply_conditions(branch->condition, n);
207 bb.push_back(n);
208 global_code.push_back(n);
209 return;
210 }
211 Node n = Operation(OperationCode::Exit);
212 n = apply_conditions(branch->condition, n);
205 bb.push_back(n); 213 bb.push_back(n);
206 global_code.push_back(n); 214 global_code.push_back(n);
207 return; 215 return;
208 } 216 }
209 Node n = Operation(OperationCode::Exit); 217 Node n = Operation(OperationCode::Branch, Immediate(branch->address));
210 n = apply_conditions(block.branch.cond, n); 218 n = apply_conditions(branch->condition, n);
211 bb.push_back(n); 219 bb.push_back(n);
212 global_code.push_back(n); 220 global_code.push_back(n);
213 return; 221 return;
214 } 222 }
215 Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); 223 auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
216 n = apply_conditions(block.branch.cond, n); 224 Node op_a = GetRegister(multi_branch->gpr);
217 bb.push_back(n); 225 for (auto& branch_case : multi_branch->branches) {
218 global_code.push_back(n); 226 Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
227 Node op_b = Immediate(branch_case.cmp_value);
228 Node condition =
229 GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
230 auto result = Conditional(condition, {n});
231 bb.push_back(result);
232 global_code.push_back(result);
233 }
219} 234}
220 235
221u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { 236u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index b73f6536e..a33d242e9 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
144 case OpCode::Id::ICMP_IMM: { 144 case OpCode::Id::ICMP_IMM: {
145 const Node zero = Immediate(0); 145 const Node zero = Immediate(0);
146 146
147 const auto [op_b, test] = [&]() -> std::pair<Node, Node> { 147 const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
148 switch (opcode->get().GetId()) { 148 switch (opcode->get().GetId()) {
149 case OpCode::Id::ICMP_CR: 149 case OpCode::Id::ICMP_CR:
150 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), 150 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
@@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
161 return {zero, zero}; 161 return {zero, zero};
162 } 162 }
163 }(); 163 }();
164 const Node op_a = GetRegister(instr.gpr8); 164 const Node op_lhs = GetRegister(instr.gpr8);
165 const Node comparison = 165 const Node comparison =
166 GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); 166 GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
167 SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b)); 167 SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
168 break; 168 break;
169 } 169 }
170 case OpCode::Id::LOP_C: 170 case OpCode::Id::LOP_C:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 95ec1cdd9..b02d2cb95 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -144,8 +144,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
144 144
145Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { 145Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
146 const auto offset{static_cast<std::size_t>(image.index.Value())}; 146 const auto offset{static_cast<std::size_t>(image.index.Value())};
147 if (const auto image = TryUseExistingImage(offset, type)) { 147 if (const auto existing_image = TryUseExistingImage(offset, type)) {
148 return *image; 148 return *existing_image;
149 } 149 }
150 150
151 const std::size_t next_index{used_images.size()}; 151 const std::size_t next_index{used_images.size()};
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 7923d4d69..335d78146 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
166 }(); 166 }();
167 167
168 const auto [real_address_base, base_address, descriptor] = 168 const auto [real_address_base, base_address, descriptor] =
169 TrackAndGetGlobalMemory(bb, instr, false); 169 TrackGlobalMemory(bb, instr, false);
170 170
171 const u32 count = GetUniformTypeElementsCount(type); 171 const u32 count = GetUniformTypeElementsCount(type);
172 if (!real_address_base || !base_address) {
173 // Tracking failed, load zeroes.
174 for (u32 i = 0; i < count; ++i) {
175 SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
176 }
177 break;
178 }
179
172 for (u32 i = 0; i < count; ++i) { 180 for (u32 i = 0; i < count; ++i) {
173 const Node it_offset = Immediate(i * 4); 181 const Node it_offset = Immediate(i * 4);
174 const Node real_address = 182 const Node real_address =
@@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
260 }(); 268 }();
261 269
262 const auto [real_address_base, base_address, descriptor] = 270 const auto [real_address_base, base_address, descriptor] =
263 TrackAndGetGlobalMemory(bb, instr, true); 271 TrackGlobalMemory(bb, instr, true);
264 272 if (!real_address_base || !base_address) {
265 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} 273 // Tracking failed, skip the store.
266 SetTemporary(bb, 0, real_address_base); 274 break;
275 }
267 276
268 const u32 count = GetUniformTypeElementsCount(type); 277 const u32 count = GetUniformTypeElementsCount(type);
269 for (u32 i = 0; i < count; ++i) { 278 for (u32 i = 0; i < count; ++i) {
270 SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
271 }
272 for (u32 i = 0; i < count; ++i) {
273 const Node it_offset = Immediate(i * 4); 279 const Node it_offset = Immediate(i * 4);
274 const Node real_address = 280 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
275 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
276 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 281 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
277 282 const Node value = GetRegister(instr.gpr0.Value() + i);
278 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); 283 bb.push_back(Operation(OperationCode::Assign, gmem, value));
279 } 284 }
280 break; 285 break;
281 } 286 }
@@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
301 return pc; 306 return pc;
302} 307}
303 308
304std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, 309std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
305 Instruction instr, 310 Instruction instr,
306 bool is_write) { 311 bool is_write) {
307 const auto addr_register{GetRegister(instr.gmem.gpr)}; 312 const auto addr_register{GetRegister(instr.gmem.gpr)};
308 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 313 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
309 314
310 const auto [base_address, index, offset] = 315 const auto [base_address, index, offset] =
311 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); 316 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
312 ASSERT(base_address != nullptr); 317 ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
318 { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
319 "Global memory tracking failed");
313 320
314 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); 321 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
315 322
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46e0f823..116b95f76 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
67 break; 67 break;
68 } 68 }
69 case OpCode::Id::MOV_SYS: { 69 case OpCode::Id::MOV_SYS: {
70 const Node value = [&]() { 70 const Node value = [this, instr] {
71 switch (instr.sys20) { 71 switch (instr.sys20) {
72 case SystemVariable::Ydirection: 72 case SystemVariable::Ydirection:
73 return Operation(OperationCode::YNegate); 73 return Operation(OperationCode::YNegate);
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index f6ee68a54..d419e9c45 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
18 const auto opcode = OpCode::Decode(instr); 18 const auto opcode = OpCode::Decode(instr);
19 19
20 Node op_a = GetRegister(instr.gpr8); 20 Node op_a = GetRegister(instr.gpr8);
21 Node op_b = [&]() { 21 Node op_b = [this, instr] {
22 if (instr.is_b_imm) { 22 if (instr.is_b_imm) {
23 return Immediate(instr.alu.GetSignedImm20_20()); 23 return Immediate(instr.alu.GetSignedImm20_20());
24 } else if (instr.is_b_gpr) { 24 } else if (instr.is_b_gpr) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b934a069..d61e656b7 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
141 const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); 141 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
142 142
143 const auto& sampler = 143 const auto& sampler =
144 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); 144 GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
145 145
146 Node4 values; 146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) { 147 for (u32 element = 0; element < values.size(); ++element) {
@@ -150,7 +150,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
150 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 150 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
151 } 151 }
152 152
153 WriteTexsInstructionFloat(bb, instr, values); 153 WriteTexsInstructionFloat(bb, instr, values, true);
154 break; 154 break;
155 } 155 }
156 case OpCode::Id::TXQ_B: 156 case OpCode::Id::TXQ_B:
@@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
165 // Sadly, not all texture instructions specify the type of texture their sampler 165 // Sadly, not all texture instructions specify the type of texture their sampler
166 // uses. This must be fixed at a later instance. 166 // uses. This must be fixed at a later instance.
167 const auto& sampler = 167 const auto& sampler =
168 is_bindless 168 is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
169 ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
170 false)
171 : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
172 169
173 u32 indexer = 0; 170 u32 indexer = 0;
174 switch (instr.txq.query_type) { 171 switch (instr.txq.query_type) {
@@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
207 204
208 auto texture_type = instr.tmml.texture_type.Value(); 205 auto texture_type = instr.tmml.texture_type.Value();
209 const bool is_array = instr.tmml.array != 0; 206 const bool is_array = instr.tmml.array != 0;
210 const auto& sampler = is_bindless 207 const auto& sampler =
211 ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) 208 is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
212 : GetSampler(instr.sampler, texture_type, is_array, false); 209 : GetSampler(instr.sampler, {{texture_type, is_array, false}});
213 210
214 std::vector<Node> coords; 211 std::vector<Node> coords;
215 212
@@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
285 return pc; 282 return pc;
286} 283}
287 284
288const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, 285const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
289 bool is_array, bool is_shadow) { 286 std::optional<SamplerInfo> sampler_info) {
290 const auto offset = static_cast<std::size_t>(sampler.index.Value()); 287 const auto offset = static_cast<u32>(sampler.index.Value());
288
289 Tegra::Shader::TextureType type;
290 bool is_array;
291 bool is_shadow;
292 if (sampler_info) {
293 type = sampler_info->type;
294 is_array = sampler_info->is_array;
295 is_shadow = sampler_info->is_shadow;
296 } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
297 type = sampler->texture_type.Value();
298 is_array = sampler->is_array.Value() != 0;
299 is_shadow = sampler->is_shadow.Value() != 0;
300 } else {
301 type = Tegra::Shader::TextureType::Texture2D;
302 is_array = false;
303 is_shadow = false;
304 }
291 305
292 // If this sampler has already been used, return the existing mapping. 306 // If this sampler has already been used, return the existing mapping.
293 const auto itr = 307 const auto itr =
@@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
303 const std::size_t next_index = used_samplers.size(); 317 const std::size_t next_index = used_samplers.size();
304 const Sampler entry{offset, next_index, type, is_array, is_shadow}; 318 const Sampler entry{offset, next_index, type, is_array, is_shadow};
305 return *used_samplers.emplace(entry).first; 319 return *used_samplers.emplace(entry).first;
306} 320} // namespace VideoCommon::Shader
307 321
308const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, 322const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
309 bool is_array, bool is_shadow) { 323 std::optional<SamplerInfo> sampler_info) {
310 const Node sampler_register = GetRegister(reg); 324 const Node sampler_register = GetRegister(reg);
311 const auto [base_sampler, cbuf_index, cbuf_offset] = 325 const auto [base_sampler, cbuf_index, cbuf_offset] =
312 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 326 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
313 ASSERT(base_sampler != nullptr); 327 ASSERT(base_sampler != nullptr);
314 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); 328 const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
329 Tegra::Shader::TextureType type;
330 bool is_array;
331 bool is_shadow;
332 if (sampler_info) {
333 type = sampler_info->type;
334 is_array = sampler_info->is_array;
335 is_shadow = sampler_info->is_shadow;
336 } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
337 type = sampler->texture_type.Value();
338 is_array = sampler->is_array.Value() != 0;
339 is_shadow = sampler->is_shadow.Value() != 0;
340 } else {
341 type = Tegra::Shader::TextureType::Texture2D;
342 is_array = false;
343 is_shadow = false;
344 }
315 345
316 // If this sampler has already been used, return the existing mapping. 346 // If this sampler has already been used, return the existing mapping.
317 const auto itr = 347 const auto itr =
@@ -344,14 +374,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
344 } 374 }
345} 375}
346 376
347void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, 377void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
348 const Node4& components) { 378 bool ignore_mask) {
349 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle 379 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
350 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 380 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
351 381
352 u32 dest_elem = 0; 382 u32 dest_elem = 0;
353 for (u32 component = 0; component < 4; ++component) { 383 for (u32 component = 0; component < 4; ++component) {
354 if (!instr.texs.IsComponentEnabled(component)) 384 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
355 continue; 385 continue;
356 SetTemporary(bb, dest_elem++, components[component]); 386 SetTemporary(bb, dest_elem++, components[component]);
357 } 387 }
@@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
411 (texture_type == TextureType::TextureCube && is_array && is_shadow), 441 (texture_type == TextureType::TextureCube && is_array && is_shadow),
412 "This method is not supported."); 442 "This method is not supported.");
413 443
414 const auto& sampler = is_bindless 444 const auto& sampler =
415 ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) 445 is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
416 : GetSampler(instr.sampler, texture_type, is_array, is_shadow); 446 : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
417 447
418 const bool lod_needed = process_mode == TextureProcessMode::LZ || 448 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
419 process_mode == TextureProcessMode::LL || 449 process_mode == TextureProcessMode::LL ||
@@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
577 dc = GetRegister(parameter_register++); 607 dc = GetRegister(parameter_register++);
578 } 608 }
579 609
580 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); 610 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
581 611
582 Node4 values; 612 Node4 values;
583 for (u32 element = 0; element < values.size(); ++element) { 613 for (u32 element = 0; element < values.size(); ++element) {
@@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
610 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; 640 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
611 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; 641 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
612 642
613 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); 643 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
614 644
615 Node4 values; 645 Node4 values;
616 for (u32 element = 0; element < values.size(); ++element) { 646 for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
646 // When lod is used always is in gpr20 676 // When lod is used always is in gpr20
647 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); 677 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
648 678
649 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); 679 const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
650 680
651 Node4 values; 681 Node4 values;
652 for (u32 element = 0; element < values.size(); ++element) { 682 for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 97fc6f9b1..b047cf870 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
23 const Node op_a = 23 const Node op_a =
24 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, 24 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
25 instr.video.type_a, instr.video.byte_height_a); 25 instr.video.type_a, instr.video.byte_height_a);
26 const Node op_b = [&]() { 26 const Node op_b = [this, instr] {
27 if (instr.video.use_register_b) { 27 if (instr.video.use_register_b) {
28 return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, 28 return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
29 instr.video.signed_b, instr.video.type_b, 29 instr.video.signed_b, instr.video.type_b,
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index a8e481b3c..fa8a250cc 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -46,9 +46,10 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
46 break; 46 break;
47 } 47 }
48 case OpCode::Id::SHFL: { 48 case OpCode::Id::SHFL: {
49 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) 49 Node width = [this, instr] {
50 : GetRegister(instr.gpr39); 50 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
51 Node width = [&] { 51 : GetRegister(instr.gpr39);
52
52 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has 53 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
53 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having 54 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
54 // different parameters that don't properly map to GLSL's interface, but it should work 55 // different parameters that don't properly map to GLSL's interface, but it should work
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
index d3dcd00ec..4e8264367 100644
--- a/src/video_core/shader/expr.h
+++ b/src/video_core/shader/expr.h
@@ -17,13 +17,14 @@ using Tegra::Shader::Pred;
17class ExprAnd; 17class ExprAnd;
18class ExprBoolean; 18class ExprBoolean;
19class ExprCondCode; 19class ExprCondCode;
20class ExprGprEqual;
20class ExprNot; 21class ExprNot;
21class ExprOr; 22class ExprOr;
22class ExprPredicate; 23class ExprPredicate;
23class ExprVar; 24class ExprVar;
24 25
25using ExprData = 26using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
26 std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>; 27 ExprBoolean, ExprGprEqual>;
27using Expr = std::shared_ptr<ExprData>; 28using Expr = std::shared_ptr<ExprData>;
28 29
29class ExprAnd final { 30class ExprAnd final {
@@ -118,6 +119,22 @@ public:
118 bool value; 119 bool value;
119}; 120};
120 121
122class ExprGprEqual final {
123public:
124 ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
125
126 bool operator==(const ExprGprEqual& b) const {
127 return gpr == b.gpr && value == b.value;
128 }
129
130 bool operator!=(const ExprGprEqual& b) const {
131 return !operator==(b);
132 }
133
134 u32 gpr;
135 u32 value;
136};
137
121template <typename T, typename... Args> 138template <typename T, typename... Args>
122Expr MakeExpr(Args&&... args) { 139Expr MakeExpr(Args&&... args) {
123 static_assert(std::is_convertible_v<T, ExprData>); 140 static_assert(std::is_convertible_v<T, ExprData>);
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 338bab17c..447fb5c1d 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -410,7 +410,7 @@ public:
410 explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} 410 explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
411 411
412 explicit OperationNode(OperationCode code, Meta meta) 412 explicit OperationNode(OperationCode code, Meta meta)
413 : OperationNode(code, meta, std::vector<Node>{}) {} 413 : OperationNode(code, std::move(meta), std::vector<Node>{}) {}
414 414
415 explicit OperationNode(OperationCode code, std::vector<Node> operands) 415 explicit OperationNode(OperationCode code, std::vector<Node> operands)
416 : OperationNode(code, Meta{}, std::move(operands)) {} 416 : OperationNode(code, Meta{}, std::move(operands)) {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index c1f2b88c8..1d9825c76 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -2,8 +2,9 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm>
6#include <array>
5#include <cmath> 7#include <cmath>
6#include <unordered_map>
7 8
8#include "common/assert.h" 9#include "common/assert.h"
9#include "common/common_types.h" 10#include "common/common_types.h"
@@ -22,10 +23,9 @@ using Tegra::Shader::PredCondition;
22using Tegra::Shader::PredOperation; 23using Tegra::Shader::PredOperation;
23using Tegra::Shader::Register; 24using Tegra::Shader::Register;
24 25
25ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, 26ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
26 CompilerSettings settings) 27 ConstBufferLocker& locker)
27 : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, 28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
28 program_manager{true, true}, settings{settings} {
29 Decode(); 29 Decode();
30} 30}
31 31
@@ -271,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
271} 271}
272 272
273Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { 273Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
274 const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { 274 static constexpr std::array comparison_table{
275 {PredCondition::LessThan, OperationCode::LogicalFLessThan}, 275 std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
276 {PredCondition::Equal, OperationCode::LogicalFEqual}, 276 std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
277 {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, 277 std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
278 {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, 278 std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
279 {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, 279 std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
280 {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, 280 std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
281 {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, 281 std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
282 {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, 282 std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
283 {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, 283 std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
284 {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, 284 std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
285 {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; 285 std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
286 286 };
287 const auto comparison{PredicateComparisonTable.find(condition)}; 287
288 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 288 const auto comparison =
289 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
290 [condition](const auto entry) { return condition == entry.first; });
291 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
289 "Unknown predicate comparison operation"); 292 "Unknown predicate comparison operation");
290 293
291 Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); 294 Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
@@ -306,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
306 309
307Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, 310Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
308 Node op_b) { 311 Node op_b) {
309 const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { 312 static constexpr std::array comparison_table{
310 {PredCondition::LessThan, OperationCode::LogicalILessThan}, 313 std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
311 {PredCondition::Equal, OperationCode::LogicalIEqual}, 314 std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
312 {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, 315 std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
313 {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, 316 std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
314 {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, 317 std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
315 {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, 318 std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
316 {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, 319 std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
317 {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, 320 std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
318 {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, 321 std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
319 {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, 322 std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
320 {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; 323 std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
321 324 };
322 const auto comparison{PredicateComparisonTable.find(condition)}; 325
323 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 326 const auto comparison =
327 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
328 [condition](const auto entry) { return condition == entry.first; });
329 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
324 "Unknown predicate comparison operation"); 330 "Unknown predicate comparison operation");
325 331
326 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), 332 Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
@@ -337,36 +343,43 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
337 343
338Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, 344Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
339 Node op_b) { 345 Node op_b) {
340 const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { 346 static constexpr std::array comparison_table{
341 {PredCondition::LessThan, OperationCode::Logical2HLessThan}, 347 std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
342 {PredCondition::Equal, OperationCode::Logical2HEqual}, 348 std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
343 {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, 349 std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
344 {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, 350 std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
345 {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, 351 std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
346 {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, 352 std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
347 {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, 353 std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
348 {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, 354 std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
349 {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, 355 std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
350 {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, 356 std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
351 {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; 357 std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
352 358 };
353 const auto comparison{PredicateComparisonTable.find(condition)}; 359
354 UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), 360 const auto comparison =
361 std::find_if(comparison_table.cbegin(), comparison_table.cend(),
362 [condition](const auto entry) { return condition == entry.first; });
363 UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
355 "Unknown predicate comparison operation"); 364 "Unknown predicate comparison operation");
356 365
357 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); 366 return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
358} 367}
359 368
360OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { 369OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
361 const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { 370 static constexpr std::array operation_table{
362 {PredOperation::And, OperationCode::LogicalAnd}, 371 OperationCode::LogicalAnd,
363 {PredOperation::Or, OperationCode::LogicalOr}, 372 OperationCode::LogicalOr,
364 {PredOperation::Xor, OperationCode::LogicalXor}, 373 OperationCode::LogicalXor,
365 }; 374 };
366 375
367 const auto op = PredicateOperationTable.find(operation); 376 const auto index = static_cast<std::size_t>(operation);
368 UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); 377 if (index >= operation_table.size()) {
369 return op->second; 378 UNIMPLEMENTED_MSG("Unknown predicate operation.");
379 return {};
380 }
381
382 return operation_table[index];
370} 383}
371 384
372Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { 385Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 105981d67..1fd44bde1 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -17,6 +17,7 @@
17#include "video_core/engines/shader_header.h" 17#include "video_core/engines/shader_header.h"
18#include "video_core/shader/ast.h" 18#include "video_core/shader/ast.h"
19#include "video_core/shader/compiler_settings.h" 19#include "video_core/shader/compiler_settings.h"
20#include "video_core/shader/const_buffer_locker.h"
20#include "video_core/shader/node.h" 21#include "video_core/shader/node.h"
21 22
22namespace VideoCommon::Shader { 23namespace VideoCommon::Shader {
@@ -66,8 +67,8 @@ struct GlobalMemoryUsage {
66 67
67class ShaderIR final { 68class ShaderIR final {
68public: 69public:
69 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, 70 explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
70 CompilerSettings settings); 71 ConstBufferLocker& locker);
71 ~ShaderIR(); 72 ~ShaderIR();
72 73
73 const std::map<u32, NodeBlock>& GetBasicBlocks() const { 74 const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -172,6 +173,13 @@ public:
172 173
173private: 174private:
174 friend class ASTDecoder; 175 friend class ASTDecoder;
176
177 struct SamplerInfo {
178 Tegra::Shader::TextureType type;
179 bool is_array;
180 bool is_shadow;
181 };
182
175 void Decode(); 183 void Decode();
176 184
177 NodeBlock DecodeRange(u32 begin, u32 end); 185 NodeBlock DecodeRange(u32 begin, u32 end);
@@ -296,12 +304,11 @@ private:
296 304
297 /// Accesses a texture sampler 305 /// Accesses a texture sampler
298 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, 306 const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
299 Tegra::Shader::TextureType type, bool is_array, bool is_shadow); 307 std::optional<SamplerInfo> sampler_info);
300 308
301 // Accesses a texture sampler for a bindless texture. 309 // Accesses a texture sampler for a bindless texture.
302 const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, 310 const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
303 Tegra::Shader::TextureType type, bool is_array, 311 std::optional<SamplerInfo> sampler_info);
304 bool is_shadow);
305 312
306 /// Accesses an image. 313 /// Accesses an image.
307 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); 314 Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -322,7 +329,7 @@ private:
322 const Node4& components); 329 const Node4& components);
323 330
324 void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 331 void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
325 const Node4& components); 332 const Node4& components, bool ignore_mask = false);
326 void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, 333 void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
327 const Node4& components); 334 const Node4& components);
328 335
@@ -371,12 +378,15 @@ private:
371 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, 378 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
372 s64 cursor) const; 379 s64 cursor) const;
373 380
374 std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( 381 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
375 NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); 382 Tegra::Shader::Instruction instr,
383 bool is_write);
376 384
377 const ProgramCode& program_code; 385 const ProgramCode& program_code;
378 const u32 main_offset; 386 const u32 main_offset;
379 const std::size_t program_size; 387 const CompilerSettings settings;
388 ConstBufferLocker& locker;
389
380 bool decompiled{}; 390 bool decompiled{};
381 bool disable_flow_stack{}; 391 bool disable_flow_stack{};
382 392
@@ -385,8 +395,7 @@ private:
385 395
386 std::map<u32, NodeBlock> basic_blocks; 396 std::map<u32, NodeBlock> basic_blocks;
387 NodeBlock global_code; 397 NodeBlock global_code;
388 ASTManager program_manager; 398 ASTManager program_manager{true, true};
389 CompilerSettings settings{};
390 399
391 std::set<u32> used_registers; 400 std::set<u32> used_registers;
392 std::set<Tegra::Shader::Pred> used_predicates; 401 std::set<Tegra::Shader::Pred> used_predicates;
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 250afc6d6..9a3c05288 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -212,6 +212,14 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
212 break; 212 break;
213 } 213 }
214 break; 214 break;
215 case Tegra::Texture::TextureFormat::A4B4G4R4:
216 switch (component_type) {
217 case Tegra::Texture::ComponentType::UNORM:
218 return PixelFormat::R4G4B4A4U;
219 default:
220 break;
221 }
222 break;
215 case Tegra::Texture::TextureFormat::R8: 223 case Tegra::Texture::TextureFormat::R8:
216 switch (component_type) { 224 switch (component_type) {
217 case Tegra::Texture::ComponentType::UNORM: 225 case Tegra::Texture::ComponentType::UNORM:
@@ -252,6 +260,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
252 default: 260 default:
253 break; 261 break;
254 } 262 }
263 break;
255 case Tegra::Texture::TextureFormat::R32_G32_B32_A32: 264 case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
256 switch (component_type) { 265 switch (component_type) {
257 case Tegra::Texture::ComponentType::FLOAT: 266 case Tegra::Texture::ComponentType::FLOAT:
@@ -350,6 +359,16 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
350 return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; 359 return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5;
351 case Tegra::Texture::TextureFormat::ASTC_2D_10X8: 360 case Tegra::Texture::TextureFormat::ASTC_2D_10X8:
352 return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; 361 return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8;
362 case Tegra::Texture::TextureFormat::ASTC_2D_6X6:
363 return is_srgb ? PixelFormat::ASTC_2D_6X6_SRGB : PixelFormat::ASTC_2D_6X6;
364 case Tegra::Texture::TextureFormat::ASTC_2D_10X10:
365 return is_srgb ? PixelFormat::ASTC_2D_10X10_SRGB : PixelFormat::ASTC_2D_10X10;
366 case Tegra::Texture::TextureFormat::ASTC_2D_12X12:
367 return is_srgb ? PixelFormat::ASTC_2D_12X12_SRGB : PixelFormat::ASTC_2D_12X12;
368 case Tegra::Texture::TextureFormat::ASTC_2D_8X6:
369 return is_srgb ? PixelFormat::ASTC_2D_8X6_SRGB : PixelFormat::ASTC_2D_8X6;
370 case Tegra::Texture::TextureFormat::ASTC_2D_6X5:
371 return is_srgb ? PixelFormat::ASTC_2D_6X5_SRGB : PixelFormat::ASTC_2D_6X5;
353 case Tegra::Texture::TextureFormat::R16_G16: 372 case Tegra::Texture::TextureFormat::R16_G16:
354 switch (component_type) { 373 switch (component_type) {
355 case Tegra::Texture::ComponentType::FLOAT: 374 case Tegra::Texture::ComponentType::FLOAT:
@@ -510,6 +529,16 @@ bool IsPixelFormatASTC(PixelFormat format) {
510 case PixelFormat::ASTC_2D_8X5_SRGB: 529 case PixelFormat::ASTC_2D_8X5_SRGB:
511 case PixelFormat::ASTC_2D_10X8: 530 case PixelFormat::ASTC_2D_10X8:
512 case PixelFormat::ASTC_2D_10X8_SRGB: 531 case PixelFormat::ASTC_2D_10X8_SRGB:
532 case PixelFormat::ASTC_2D_6X6:
533 case PixelFormat::ASTC_2D_6X6_SRGB:
534 case PixelFormat::ASTC_2D_10X10:
535 case PixelFormat::ASTC_2D_10X10_SRGB:
536 case PixelFormat::ASTC_2D_12X12:
537 case PixelFormat::ASTC_2D_12X12_SRGB:
538 case PixelFormat::ASTC_2D_8X6:
539 case PixelFormat::ASTC_2D_8X6_SRGB:
540 case PixelFormat::ASTC_2D_6X5:
541 case PixelFormat::ASTC_2D_6X5_SRGB:
513 return true; 542 return true;
514 default: 543 default:
515 return false; 544 return false;
@@ -530,6 +559,11 @@ bool IsPixelFormatSRGB(PixelFormat format) {
530 case PixelFormat::ASTC_2D_5X4_SRGB: 559 case PixelFormat::ASTC_2D_5X4_SRGB:
531 case PixelFormat::ASTC_2D_5X5_SRGB: 560 case PixelFormat::ASTC_2D_5X5_SRGB:
532 case PixelFormat::ASTC_2D_10X8_SRGB: 561 case PixelFormat::ASTC_2D_10X8_SRGB:
562 case PixelFormat::ASTC_2D_6X6_SRGB:
563 case PixelFormat::ASTC_2D_10X10_SRGB:
564 case PixelFormat::ASTC_2D_12X12_SRGB:
565 case PixelFormat::ASTC_2D_8X6_SRGB:
566 case PixelFormat::ASTC_2D_6X5_SRGB:
533 return true; 567 return true;
534 default: 568 default:
535 return false; 569 return false;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 1e1c432a5..97668f802 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -67,27 +67,38 @@ enum class PixelFormat {
67 DXT23_SRGB = 49, 67 DXT23_SRGB = 49,
68 DXT45_SRGB = 50, 68 DXT45_SRGB = 50,
69 BC7U_SRGB = 51, 69 BC7U_SRGB = 51,
70 ASTC_2D_4X4_SRGB = 52, 70 R4G4B4A4U = 52,
71 ASTC_2D_8X8_SRGB = 53, 71 ASTC_2D_4X4_SRGB = 53,
72 ASTC_2D_8X5_SRGB = 54, 72 ASTC_2D_8X8_SRGB = 54,
73 ASTC_2D_5X4_SRGB = 55, 73 ASTC_2D_8X5_SRGB = 55,
74 ASTC_2D_5X5 = 56, 74 ASTC_2D_5X4_SRGB = 56,
75 ASTC_2D_5X5_SRGB = 57, 75 ASTC_2D_5X5 = 57,
76 ASTC_2D_10X8 = 58, 76 ASTC_2D_5X5_SRGB = 58,
77 ASTC_2D_10X8_SRGB = 59, 77 ASTC_2D_10X8 = 59,
78 ASTC_2D_10X8_SRGB = 60,
79 ASTC_2D_6X6 = 61,
80 ASTC_2D_6X6_SRGB = 62,
81 ASTC_2D_10X10 = 63,
82 ASTC_2D_10X10_SRGB = 64,
83 ASTC_2D_12X12 = 65,
84 ASTC_2D_12X12_SRGB = 66,
85 ASTC_2D_8X6 = 67,
86 ASTC_2D_8X6_SRGB = 68,
87 ASTC_2D_6X5 = 69,
88 ASTC_2D_6X5_SRGB = 70,
78 89
79 MaxColorFormat, 90 MaxColorFormat,
80 91
81 // Depth formats 92 // Depth formats
82 Z32F = 60, 93 Z32F = 71,
83 Z16 = 61, 94 Z16 = 72,
84 95
85 MaxDepthFormat, 96 MaxDepthFormat,
86 97
87 // DepthStencil formats 98 // DepthStencil formats
88 Z24S8 = 62, 99 Z24S8 = 73,
89 S8Z24 = 63, 100 S8Z24 = 74,
90 Z32FS8 = 64, 101 Z32FS8 = 75,
91 102
92 MaxDepthStencilFormat, 103 MaxDepthStencilFormat,
93 104
@@ -177,6 +188,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
177 2, // DXT23_SRGB 188 2, // DXT23_SRGB
178 2, // DXT45_SRGB 189 2, // DXT45_SRGB
179 2, // BC7U_SRGB 190 2, // BC7U_SRGB
191 0, // R4G4B4A4U
180 2, // ASTC_2D_4X4_SRGB 192 2, // ASTC_2D_4X4_SRGB
181 2, // ASTC_2D_8X8_SRGB 193 2, // ASTC_2D_8X8_SRGB
182 2, // ASTC_2D_8X5_SRGB 194 2, // ASTC_2D_8X5_SRGB
@@ -185,6 +197,16 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
185 2, // ASTC_2D_5X5_SRGB 197 2, // ASTC_2D_5X5_SRGB
186 2, // ASTC_2D_10X8 198 2, // ASTC_2D_10X8
187 2, // ASTC_2D_10X8_SRGB 199 2, // ASTC_2D_10X8_SRGB
200 2, // ASTC_2D_6X6
201 2, // ASTC_2D_6X6_SRGB
202 2, // ASTC_2D_10X10
203 2, // ASTC_2D_10X10_SRGB
204 2, // ASTC_2D_12X12
205 2, // ASTC_2D_12X12_SRGB
206 2, // ASTC_2D_8X6
207 2, // ASTC_2D_8X6_SRGB
208 2, // ASTC_2D_6X5
209 2, // ASTC_2D_6X5_SRGB
188 0, // Z32F 210 0, // Z32F
189 0, // Z16 211 0, // Z16
190 0, // Z24S8 212 0, // Z24S8
@@ -261,6 +283,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
261 4, // DXT23_SRGB 283 4, // DXT23_SRGB
262 4, // DXT45_SRGB 284 4, // DXT45_SRGB
263 4, // BC7U_SRGB 285 4, // BC7U_SRGB
286 1, // R4G4B4A4U
264 4, // ASTC_2D_4X4_SRGB 287 4, // ASTC_2D_4X4_SRGB
265 8, // ASTC_2D_8X8_SRGB 288 8, // ASTC_2D_8X8_SRGB
266 8, // ASTC_2D_8X5_SRGB 289 8, // ASTC_2D_8X5_SRGB
@@ -269,6 +292,16 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
269 5, // ASTC_2D_5X5_SRGB 292 5, // ASTC_2D_5X5_SRGB
270 10, // ASTC_2D_10X8 293 10, // ASTC_2D_10X8
271 10, // ASTC_2D_10X8_SRGB 294 10, // ASTC_2D_10X8_SRGB
295 6, // ASTC_2D_6X6
296 6, // ASTC_2D_6X6_SRGB
297 10, // ASTC_2D_10X10
298 10, // ASTC_2D_10X10_SRGB
299 12, // ASTC_2D_12X12
300 12, // ASTC_2D_12X12_SRGB
301 8, // ASTC_2D_8X6
302 8, // ASTC_2D_8X6_SRGB
303 6, // ASTC_2D_6X5
304 6, // ASTC_2D_6X5_SRGB
272 1, // Z32F 305 1, // Z32F
273 1, // Z16 306 1, // Z16
274 1, // Z24S8 307 1, // Z24S8
@@ -285,71 +318,82 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
285} 318}
286 319
287constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 320constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
288 1, // ABGR8U 321 1, // ABGR8U
289 1, // ABGR8S 322 1, // ABGR8S
290 1, // ABGR8UI 323 1, // ABGR8UI
291 1, // B5G6R5U 324 1, // B5G6R5U
292 1, // A2B10G10R10U 325 1, // A2B10G10R10U
293 1, // A1B5G5R5U 326 1, // A1B5G5R5U
294 1, // R8U 327 1, // R8U
295 1, // R8UI 328 1, // R8UI
296 1, // RGBA16F 329 1, // RGBA16F
297 1, // RGBA16U 330 1, // RGBA16U
298 1, // RGBA16UI 331 1, // RGBA16UI
299 1, // R11FG11FB10F 332 1, // R11FG11FB10F
300 1, // RGBA32UI 333 1, // RGBA32UI
301 4, // DXT1 334 4, // DXT1
302 4, // DXT23 335 4, // DXT23
303 4, // DXT45 336 4, // DXT45
304 4, // DXN1 337 4, // DXN1
305 4, // DXN2UNORM 338 4, // DXN2UNORM
306 4, // DXN2SNORM 339 4, // DXN2SNORM
307 4, // BC7U 340 4, // BC7U
308 4, // BC6H_UF16 341 4, // BC6H_UF16
309 4, // BC6H_SF16 342 4, // BC6H_SF16
310 4, // ASTC_2D_4X4 343 4, // ASTC_2D_4X4
311 1, // BGRA8 344 1, // BGRA8
312 1, // RGBA32F 345 1, // RGBA32F
313 1, // RG32F 346 1, // RG32F
314 1, // R32F 347 1, // R32F
315 1, // R16F 348 1, // R16F
316 1, // R16U 349 1, // R16U
317 1, // R16S 350 1, // R16S
318 1, // R16UI 351 1, // R16UI
319 1, // R16I 352 1, // R16I
320 1, // RG16 353 1, // RG16
321 1, // RG16F 354 1, // RG16F
322 1, // RG16UI 355 1, // RG16UI
323 1, // RG16I 356 1, // RG16I
324 1, // RG16S 357 1, // RG16S
325 1, // RGB32F 358 1, // RGB32F
326 1, // RGBA8_SRGB 359 1, // RGBA8_SRGB
327 1, // RG8U 360 1, // RG8U
328 1, // RG8S 361 1, // RG8S
329 1, // RG32UI 362 1, // RG32UI
330 1, // RGBX16F 363 1, // RGBX16F
331 1, // R32UI 364 1, // R32UI
332 8, // ASTC_2D_8X8 365 8, // ASTC_2D_8X8
333 5, // ASTC_2D_8X5 366 5, // ASTC_2D_8X5
334 4, // ASTC_2D_5X4 367 4, // ASTC_2D_5X4
335 1, // BGRA8_SRGB 368 1, // BGRA8_SRGB
336 4, // DXT1_SRGB 369 4, // DXT1_SRGB
337 4, // DXT23_SRGB 370 4, // DXT23_SRGB
338 4, // DXT45_SRGB 371 4, // DXT45_SRGB
339 4, // BC7U_SRGB 372 4, // BC7U_SRGB
340 4, // ASTC_2D_4X4_SRGB 373 1, // R4G4B4A4U
341 8, // ASTC_2D_8X8_SRGB 374 4, // ASTC_2D_4X4_SRGB
342 5, // ASTC_2D_8X5_SRGB 375 8, // ASTC_2D_8X8_SRGB
343 4, // ASTC_2D_5X4_SRGB 376 5, // ASTC_2D_8X5_SRGB
344 5, // ASTC_2D_5X5 377 4, // ASTC_2D_5X4_SRGB
345 5, // ASTC_2D_5X5_SRGB 378 5, // ASTC_2D_5X5
346 8, // ASTC_2D_10X8 379 5, // ASTC_2D_5X5_SRGB
347 8, // ASTC_2D_10X8_SRGB 380 8, // ASTC_2D_10X8
348 1, // Z32F 381 8, // ASTC_2D_10X8_SRGB
349 1, // Z16 382 6, // ASTC_2D_6X6
350 1, // Z24S8 383 6, // ASTC_2D_6X6_SRGB
351 1, // S8Z24 384 10, // ASTC_2D_10X10
352 1, // Z32FS8 385 10, // ASTC_2D_10X10_SRGB
386 12, // ASTC_2D_12X12
387 12, // ASTC_2D_12X12_SRGB
388 6, // ASTC_2D_8X6
389 6, // ASTC_2D_8X6_SRGB
390 5, // ASTC_2D_6X5
391 5, // ASTC_2D_6X5_SRGB
392 1, // Z32F
393 1, // Z16
394 1, // Z24S8
395 1, // S8Z24
396 1, // Z32FS8
353}}; 397}};
354 398
355static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { 399static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
@@ -413,6 +457,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
413 128, // DXT23_SRGB 457 128, // DXT23_SRGB
414 128, // DXT45_SRGB 458 128, // DXT45_SRGB
415 128, // BC7U 459 128, // BC7U
460 16, // R4G4B4A4U
416 128, // ASTC_2D_4X4_SRGB 461 128, // ASTC_2D_4X4_SRGB
417 128, // ASTC_2D_8X8_SRGB 462 128, // ASTC_2D_8X8_SRGB
418 128, // ASTC_2D_8X5_SRGB 463 128, // ASTC_2D_8X5_SRGB
@@ -421,6 +466,16 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
421 128, // ASTC_2D_5X5_SRGB 466 128, // ASTC_2D_5X5_SRGB
422 128, // ASTC_2D_10X8 467 128, // ASTC_2D_10X8
423 128, // ASTC_2D_10X8_SRGB 468 128, // ASTC_2D_10X8_SRGB
469 128, // ASTC_2D_6X6
470 128, // ASTC_2D_6X6_SRGB
471 128, // ASTC_2D_10X10
472 128, // ASTC_2D_10X10_SRGB
473 128, // ASTC_2D_12X12
474 128, // ASTC_2D_12X12_SRGB
475 128, // ASTC_2D_8X6
476 128, // ASTC_2D_8X6_SRGB
477 128, // ASTC_2D_6X5
478 128, // ASTC_2D_6X5_SRGB
424 32, // Z32F 479 32, // Z32F
425 16, // Z16 480 16, // Z16
426 32, // Z24S8 481 32, // Z24S8
@@ -504,6 +559,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
504 SurfaceCompression::Compressed, // DXT23_SRGB 559 SurfaceCompression::Compressed, // DXT23_SRGB
505 SurfaceCompression::Compressed, // DXT45_SRGB 560 SurfaceCompression::Compressed, // DXT45_SRGB
506 SurfaceCompression::Compressed, // BC7U_SRGB 561 SurfaceCompression::Compressed, // BC7U_SRGB
562 SurfaceCompression::None, // R4G4B4A4U
507 SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB 563 SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB
508 SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB 564 SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB
509 SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB 565 SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB
@@ -512,6 +568,16 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
512 SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB 568 SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB
513 SurfaceCompression::Converted, // ASTC_2D_10X8 569 SurfaceCompression::Converted, // ASTC_2D_10X8
514 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB 570 SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB
571 SurfaceCompression::Converted, // ASTC_2D_6X6
572 SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB
573 SurfaceCompression::Converted, // ASTC_2D_10X10
574 SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB
575 SurfaceCompression::Converted, // ASTC_2D_12X12
576 SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB
577 SurfaceCompression::Converted, // ASTC_2D_8X6
578 SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB
579 SurfaceCompression::Converted, // ASTC_2D_6X5
580 SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB
515 SurfaceCompression::None, // Z32F 581 SurfaceCompression::None, // Z32F
516 SurfaceCompression::None, // Z16 582 SurfaceCompression::None, // Z16
517 SurfaceCompression::None, // Z24S8 583 SurfaceCompression::None, // Z24S8
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 683c49207..829268b4c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -2,6 +2,7 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "common/algorithm.h"
5#include "common/assert.h" 6#include "common/assert.h"
6#include "common/common_types.h" 7#include "common/common_types.h"
7#include "common/microprofile.h" 8#include "common/microprofile.h"
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 5e497e49f..1bed82898 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -4,12 +4,11 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm> 7#include <optional>
8#include <tuple>
8#include <unordered_map> 9#include <unordered_map>
9#include <vector> 10#include <vector>
10 11
11#include "common/assert.h"
12#include "common/binary_find.h"
13#include "common/common_types.h" 12#include "common/common_types.h"
14#include "video_core/gpu.h" 13#include "video_core/gpu.h"
15#include "video_core/morton.h" 14#include "video_core/morton.h"
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index ca2da8f97..6a92b22d3 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -62,10 +62,10 @@ public:
62 } 62 }
63 } 63 }
64 64
65 /*** 65 /**
66 * `Guard` guarantees that rendertargets don't unregister themselves if the 66 * Guarantees that rendertargets don't unregister themselves if the
67 * collide. Protection is currently only done on 3D slices. 67 * collide. Protection is currently only done on 3D slices.
68 ***/ 68 */
69 void GuardRenderTargets(bool new_guard) { 69 void GuardRenderTargets(bool new_guard) {
70 guard_render_targets = new_guard; 70 guard_render_targets = new_guard;
71 } 71 }
@@ -287,7 +287,7 @@ protected:
287 const Tegra::Engines::Fermi2D::Config& copy_config) = 0; 287 const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
288 288
289 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture 289 // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
290 // and reading it from a sepparate buffer. 290 // and reading it from a separate buffer.
291 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; 291 virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
292 292
293 void ManageRenderTargetUnregister(TSurface& surface) { 293 void ManageRenderTargetUnregister(TSurface& surface) {
@@ -386,12 +386,13 @@ private:
386 }; 386 };
387 387
388 /** 388 /**
389 * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. 389 * Takes care of selecting a proper strategy to deal with a texture recycle.
390 * @param overlaps, the overlapping surfaces registered in the cache. 390 *
391 * @param params, the paremeters on the new surface. 391 * @param overlaps The overlapping surfaces registered in the cache.
392 * @param gpu_addr, the starting address of the new surface. 392 * @param params The parameters on the new surface.
393 * @param untopological, tells the recycler that the texture has no way to match the overlaps 393 * @param gpu_addr The starting address of the new surface.
394 * due to topological reasons. 394 * @param untopological Indicates to the recycler that the texture has no way
395 * to match the overlaps due to topological reasons.
395 **/ 396 **/
396 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 397 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
397 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 398 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
@@ -402,7 +403,7 @@ private:
402 if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { 403 if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
403 return RecycleStrategy::Flush; 404 return RecycleStrategy::Flush;
404 } 405 }
405 for (auto s : overlaps) { 406 for (const auto& s : overlaps) {
406 const auto& s_params = s->GetSurfaceParams(); 407 const auto& s_params = s->GetSurfaceParams();
407 if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { 408 if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
408 return RecycleStrategy::Flush; 409 return RecycleStrategy::Flush;
@@ -419,16 +420,19 @@ private:
419 } 420 }
420 421
421 /** 422 /**
422 * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in 423 * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
423 *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the 424 * strategies: Ignore and Flush.
424 *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the 425 *
425 *new surface from that data. 426 * - Ignore: Just unregisters all the overlaps and loads the new texture.
426 * @param overlaps, the overlapping surfaces registered in the cache. 427 * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
427 * @param params, the paremeters on the new surface. 428 *
428 * @param gpu_addr, the starting address of the new surface. 429 * @param overlaps The overlapping surfaces registered in the cache.
429 * @param preserve_contents, tells if the new surface should be loaded from meory or left blank 430 * @param params The parameters for the new surface.
430 * @param untopological, tells the recycler that the texture has no way to match the overlaps 431 * @param gpu_addr The starting address of the new surface.
431 * due to topological reasons. 432 * @param preserve_contents Indicates that the new surface should be loaded from memory or left
433 * blank.
434 * @param untopological Indicates to the recycler that the texture has no way to match the
435 * overlaps due to topological reasons.
432 **/ 436 **/
433 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 437 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
434 const SurfaceParams& params, const GPUVAddr gpu_addr, 438 const SurfaceParams& params, const GPUVAddr gpu_addr,
@@ -465,10 +469,12 @@ private:
465 } 469 }
466 470
467 /** 471 /**
468 * `RebuildSurface` this method takes a single surface and recreates into another that 472 * Takes a single surface and recreates into another that may differ in
469 * may differ in format, target or width alingment. 473 * format, target or width alignment.
470 * @param current_surface, the registered surface in the cache which we want to convert. 474 *
471 * @param params, the new surface params which we'll use to recreate the surface. 475 * @param current_surface The registered surface in the cache which we want to convert.
476 * @param params The new surface params which we'll use to recreate the surface.
477 * @param is_render Whether or not the surface is a render target.
472 **/ 478 **/
473 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, 479 std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
474 bool is_render) { 480 bool is_render) {
@@ -502,12 +508,14 @@ private:
502 } 508 }
503 509
504 /** 510 /**
505 * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's 511 * Takes a single surface and checks with the new surface's params if it's an exact
506 * params if it's an exact match, we return the main view of the registered surface. If it's 512 * match, we return the main view of the registered surface. If its formats don't
507 * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats 513 * match, we rebuild the surface. We call this last method a `Mirage`. If formats
508 * match but the targets don't, we create an overview View of the registered surface. 514 * match but the targets don't, we create an overview View of the registered surface.
509 * @param current_surface, the registered surface in the cache which we want to convert. 515 *
510 * @param params, the new surface params which we want to check. 516 * @param current_surface The registered surface in the cache which we want to convert.
517 * @param params The new surface params which we want to check.
518 * @param is_render Whether or not the surface is a render target.
511 **/ 519 **/
512 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, 520 std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
513 const SurfaceParams& params, bool is_render) { 521 const SurfaceParams& params, bool is_render) {
@@ -529,13 +537,14 @@ private:
529 } 537 }
530 538
531 /** 539 /**
532 * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface 540 * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
533 * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps 541 * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
534 * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface 542 * of the new surface, if they all match we end up recreating a surface for them,
535 * for them, else we return nothing. 543 * else we return nothing.
536 * @param overlaps, the overlapping surfaces registered in the cache. 544 *
537 * @param params, the paremeters on the new surface. 545 * @param overlaps The overlapping surfaces registered in the cache.
538 * @param gpu_addr, the starting address of the new surface. 546 * @param params The parameters on the new surface.
547 * @param gpu_addr The starting address of the new surface.
539 **/ 548 **/
540 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, 549 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
541 const SurfaceParams& params, 550 const SurfaceParams& params,
@@ -575,7 +584,7 @@ private:
575 } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { 584 } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
576 return {}; 585 return {};
577 } 586 }
578 for (auto surface : overlaps) { 587 for (const auto& surface : overlaps) {
579 Unregister(surface); 588 Unregister(surface);
580 } 589 }
581 new_surface->MarkAsModified(modified, Tick()); 590 new_surface->MarkAsModified(modified, Tick());
@@ -584,19 +593,27 @@ private:
584 } 593 }
585 594
586 /** 595 /**
587 * `GetSurface` gets the starting address and parameters of a candidate surface and tries 596 * Gets the starting address and parameters of a candidate surface and tries
588 * to find a matching surface within the cache. This is done in 3 big steps. The first is to 597 * to find a matching surface within the cache. This is done in 3 big steps:
589 * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. 598 *
590 * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from 599 * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
591 * memory else we move to step 3. Step 3 consists on figuring the relationship between the 600 *
592 * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many 601 * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
593 * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the 602 * memory else we move to step 3.
594 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to 603 *
595 * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface 604 * 3. Consists of figuring out the relationship between the candidate texture and the
596 * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. 605 * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
597 * @param gpu_addr, the starting address of the candidate surface. 606 * there's many, we just try to reconstruct a new surface out of them based on the
598 * @param params, the paremeters on the candidate surface. 607 * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
599 * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. 608 * have to check if the candidate is a view (layer/mipmap) of the overlap or if the
609 * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
610 * a new surface.
611 *
612 * @param gpu_addr The starting address of the candidate surface.
613 * @param params The parameters on the candidate surface.
614 * @param preserve_contents Indicates that the new surface should be loaded from memory or
615 * left blank.
616 * @param is_render Whether or not the surface is a render target.
600 **/ 617 **/
601 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, 618 std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
602 bool preserve_contents, bool is_render) { 619 bool preserve_contents, bool is_render) {
@@ -651,7 +668,7 @@ private:
651 // Step 3 668 // Step 3
652 // Now we need to figure the relationship between the texture and its overlaps 669 // Now we need to figure the relationship between the texture and its overlaps
653 // we do a topological test to ensure we can find some relationship. If it fails 670 // we do a topological test to ensure we can find some relationship. If it fails
654 // inmediatly recycle the texture 671 // immediately recycle the texture
655 for (const auto& surface : overlaps) { 672 for (const auto& surface : overlaps) {
656 const auto topological_result = surface->MatchesTopology(params); 673 const auto topological_result = surface->MatchesTopology(params);
657 if (topological_result != MatchTopologyResult::FullMatch) { 674 if (topological_result != MatchTopologyResult::FullMatch) {
@@ -720,12 +737,13 @@ private:
720 } 737 }
721 738
722 /** 739 /**
723 * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries 740 * Gets the starting address and parameters of a candidate surface and tries to find a
724 * to find a matching surface within the cache that's similar to it. If there are many textures 741 * matching surface within the cache that's similar to it. If there are many textures
725 * or the texture found if entirely incompatible, it will fail. If no texture is found, the 742 * or the texture found if entirely incompatible, it will fail. If no texture is found, the
726 * blit will be unsuccessful. 743 * blit will be unsuccessful.
727 * @param gpu_addr, the starting address of the candidate surface. 744 *
728 * @param params, the paremeters on the candidate surface. 745 * @param gpu_addr The starting address of the candidate surface.
746 * @param params The parameters on the candidate surface.
729 **/ 747 **/
730 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { 748 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
731 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; 749 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
@@ -777,11 +795,14 @@ private:
777 } 795 }
778 796
779 /** 797 /**
780 * `DeduceBestBlit` gets the a source and destination starting address and parameters, 798 * Gets the a source and destination starting address and parameters,
781 * and tries to deduce if they are supposed to be depth textures. If so, their 799 * and tries to deduce if they are supposed to be depth textures. If so, their
782 * parameters are modified and fixed into so. 800 * parameters are modified and fixed into so.
783 * @param gpu_addr, the starting address of the candidate surface. 801 *
784 * @param params, the parameters on the candidate surface. 802 * @param src_params The parameters of the candidate surface.
803 * @param dst_params The parameters of the destination surface.
804 * @param src_gpu_addr The starting address of the candidate surface.
805 * @param dst_gpu_addr The starting address of the destination surface.
785 **/ 806 **/
786 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, 807 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
787 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { 808 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index a9b8f69af..58b608a36 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -422,7 +422,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
422 TexelWeightParams params; 422 TexelWeightParams params;
423 423
424 // Read the entire block mode all at once 424 // Read the entire block mode all at once
425 uint16_t modeBits = strm.ReadBits(11); 425 uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11));
426 426
427 // Does this match the void extent block mode? 427 // Does this match the void extent block mode?
428 if ((modeBits & 0x01FF) == 0x1FC) { 428 if ((modeBits & 0x01FF) == 0x1FC) {
@@ -625,10 +625,10 @@ static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint
625 } 625 }
626 626
627 // Decode the RGBA components and renormalize them to the range [0, 255] 627 // Decode the RGBA components and renormalize them to the range [0, 255]
628 uint16_t r = strm.ReadBits(16); 628 uint16_t r = static_cast<uint16_t>(strm.ReadBits(16));
629 uint16_t g = strm.ReadBits(16); 629 uint16_t g = static_cast<uint16_t>(strm.ReadBits(16));
630 uint16_t b = strm.ReadBits(16); 630 uint16_t b = static_cast<uint16_t>(strm.ReadBits(16));
631 uint16_t a = strm.ReadBits(16); 631 uint16_t a = static_cast<uint16_t>(strm.ReadBits(16));
632 632
633 uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | 633 uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
634 (static_cast<uint32_t>(a) & 0xFF00) << 16; 634 (static_cast<uint32_t>(a) & 0xFF00) << 16;
@@ -681,9 +681,10 @@ protected:
681 681
682public: 682public:
683 Pixel() = default; 683 Pixel() = default;
684 Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) 684 Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8)
685 : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, 685 : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)},
686 color{a, r, g, b} {} 686 color{static_cast<ChannelType>(a), static_cast<ChannelType>(r),
687 static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {}
687 688
688 // Changes the depth of each pixel. This scales the values to 689 // Changes the depth of each pixel. This scales the values to
689 // the appropriate bit depth by either truncating the least 690 // the appropriate bit depth by either truncating the least