diff options
79 files changed, 2346 insertions, 1477 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 09eabe2c7..21e03ae98 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -85,10 +85,12 @@ set(HASH_FILES | |||
| 85 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 85 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 86 | "${VIDEO_CORE}/shader/ast.cpp" | 86 | "${VIDEO_CORE}/shader/ast.cpp" |
| 87 | "${VIDEO_CORE}/shader/ast.h" | 87 | "${VIDEO_CORE}/shader/ast.h" |
| 88 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 89 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 90 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | 88 | "${VIDEO_CORE}/shader/compiler_settings.cpp" |
| 91 | "${VIDEO_CORE}/shader/compiler_settings.h" | 89 | "${VIDEO_CORE}/shader/compiler_settings.h" |
| 90 | "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||
| 91 | "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||
| 92 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 93 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 92 | "${VIDEO_CORE}/shader/decode.cpp" | 94 | "${VIDEO_CORE}/shader/decode.cpp" |
| 93 | "${VIDEO_CORE}/shader/expr.cpp" | 95 | "${VIDEO_CORE}/shader/expr.cpp" |
| 94 | "${VIDEO_CORE}/shader/expr.h" | 96 | "${VIDEO_CORE}/shader/expr.h" |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 906c486fd..9c6f1c07c 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -74,10 +74,12 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 74 | "${VIDEO_CORE}/shader/decode/xmad.cpp" | 74 | "${VIDEO_CORE}/shader/decode/xmad.cpp" |
| 75 | "${VIDEO_CORE}/shader/ast.cpp" | 75 | "${VIDEO_CORE}/shader/ast.cpp" |
| 76 | "${VIDEO_CORE}/shader/ast.h" | 76 | "${VIDEO_CORE}/shader/ast.h" |
| 77 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 78 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 79 | "${VIDEO_CORE}/shader/compiler_settings.cpp" | 77 | "${VIDEO_CORE}/shader/compiler_settings.cpp" |
| 80 | "${VIDEO_CORE}/shader/compiler_settings.h" | 78 | "${VIDEO_CORE}/shader/compiler_settings.h" |
| 79 | "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||
| 80 | "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||
| 81 | "${VIDEO_CORE}/shader/control_flow.cpp" | ||
| 82 | "${VIDEO_CORE}/shader/control_flow.h" | ||
| 81 | "${VIDEO_CORE}/shader/decode.cpp" | 83 | "${VIDEO_CORE}/shader/decode.cpp" |
| 82 | "${VIDEO_CORE}/shader/expr.cpp" | 84 | "${VIDEO_CORE}/shader/expr.cpp" |
| 83 | "${VIDEO_CORE}/shader/expr.h" | 85 | "${VIDEO_CORE}/shader/expr.h" |
| @@ -95,11 +97,11 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 95 | ) | 97 | ) |
| 96 | 98 | ||
| 97 | add_library(common STATIC | 99 | add_library(common STATIC |
| 100 | algorithm.h | ||
| 98 | alignment.h | 101 | alignment.h |
| 99 | assert.h | 102 | assert.h |
| 100 | detached_tasks.cpp | 103 | detached_tasks.cpp |
| 101 | detached_tasks.h | 104 | detached_tasks.h |
| 102 | binary_find.h | ||
| 103 | bit_field.h | 105 | bit_field.h |
| 104 | bit_util.h | 106 | bit_util.h |
| 105 | cityhash.cpp | 107 | cityhash.cpp |
diff --git a/src/common/binary_find.h b/src/common/algorithm.h index 5cc523bf9..e21b1373c 100644 --- a/src/common/binary_find.h +++ b/src/common/algorithm.h | |||
| @@ -5,6 +5,12 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <functional> | ||
| 9 | |||
| 10 | // Algorithms that operate on iterators, much like the <algorithm> header. | ||
| 11 | // | ||
| 12 | // Note: If the algorithm is not general-purpose and/or doesn't operate on iterators, | ||
| 13 | // it should probably not be placed within this header. | ||
| 8 | 14 | ||
| 9 | namespace Common { | 15 | namespace Common { |
| 10 | 16 | ||
diff --git a/src/common/hash.h b/src/common/hash.h index 40194d1ee..ebd4125e2 100644 --- a/src/common/hash.h +++ b/src/common/hash.h | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | 6 | ||
| 7 | #include <cstddef> | 7 | #include <cstddef> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <utility> | ||
| 10 | #include <boost/functional/hash.hpp> | ||
| 9 | #include "common/cityhash.h" | 11 | #include "common/cityhash.h" |
| 10 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 11 | 13 | ||
| @@ -68,4 +70,13 @@ struct HashableStruct { | |||
| 68 | } | 70 | } |
| 69 | }; | 71 | }; |
| 70 | 72 | ||
| 73 | struct PairHash { | ||
| 74 | template <class T1, class T2> | ||
| 75 | std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept { | ||
| 76 | std::size_t seed = std::hash<T1>()(pair.first); | ||
| 77 | boost::hash_combine(seed, std::hash<T2>()(pair.second)); | ||
| 78 | return seed; | ||
| 79 | } | ||
| 80 | }; | ||
| 81 | |||
| 71 | } // namespace Common | 82 | } // namespace Common |
diff --git a/src/core/core.cpp b/src/core/core.cpp index d79045eea..eba17218a 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -112,8 +112,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||
| 112 | } | 112 | } |
| 113 | struct System::Impl { | 113 | struct System::Impl { |
| 114 | explicit Impl(System& system) | 114 | explicit Impl(System& system) |
| 115 | : kernel{system}, fs_controller{system}, cpu_core_manager{system}, | 115 | : kernel{system}, fs_controller{system}, cpu_core_manager{system}, reporter{system}, |
| 116 | applet_manager{system}, reporter{system} {} | 116 | applet_manager{system} {} |
| 117 | 117 | ||
| 118 | Cpu& CurrentCpuCore() { | 118 | Cpu& CurrentCpuCore() { |
| 119 | return cpu_core_manager.GetCurrentCore(); | 119 | return cpu_core_manager.GetCurrentCore(); |
| @@ -240,22 +240,27 @@ struct System::Impl { | |||
| 240 | } | 240 | } |
| 241 | 241 | ||
| 242 | void Shutdown() { | 242 | void Shutdown() { |
| 243 | // Log last frame performance stats | 243 | // Log last frame performance stats if game was loded |
| 244 | const auto perf_results = GetAndResetPerfStats(); | 244 | if (perf_stats) { |
| 245 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", | 245 | const auto perf_results = GetAndResetPerfStats(); |
| 246 | perf_results.emulation_speed * 100.0); | 246 | telemetry_session->AddField(Telemetry::FieldType::Performance, |
| 247 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", | 247 | "Shutdown_EmulationSpeed", |
| 248 | perf_results.game_fps); | 248 | perf_results.emulation_speed * 100.0); |
| 249 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", | 249 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", |
| 250 | perf_results.frametime * 1000.0); | 250 | perf_results.game_fps); |
| 251 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", | 251 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", |
| 252 | perf_stats->GetMeanFrametime()); | 252 | perf_results.frametime * 1000.0); |
| 253 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS", | ||
| 254 | perf_stats->GetMeanFrametime()); | ||
| 255 | } | ||
| 253 | 256 | ||
| 254 | lm_manager.Flush(); | 257 | lm_manager.Flush(); |
| 255 | 258 | ||
| 256 | is_powered_on = false; | 259 | is_powered_on = false; |
| 257 | exit_lock = false; | 260 | exit_lock = false; |
| 258 | 261 | ||
| 262 | gpu_core->WaitIdle(); | ||
| 263 | |||
| 259 | // Shutdown emulation session | 264 | // Shutdown emulation session |
| 260 | renderer.reset(); | 265 | renderer.reset(); |
| 261 | GDBStub::Shutdown(); | 266 | GDBStub::Shutdown(); |
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index 941ebc93a..3a32d5b41 100644 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp | |||
| @@ -1140,8 +1140,9 @@ void IApplicationFunctions::PopLaunchParameter(Kernel::HLERequestContext& ctx) { | |||
| 1140 | LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind)); | 1140 | LOG_DEBUG(Service_AM, "called, kind={:08X}", static_cast<u8>(kind)); |
| 1141 | 1141 | ||
| 1142 | if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { | 1142 | if (kind == LaunchParameterKind::ApplicationSpecific && !launch_popped_application_specific) { |
| 1143 | const auto backend = BCAT::CreateBackendFromSettings( | 1143 | const auto backend = BCAT::CreateBackendFromSettings(system, [this](u64 tid) { |
| 1144 | [this](u64 tid) { return system.GetFileSystemController().GetBCATDirectory(tid); }); | 1144 | return system.GetFileSystemController().GetBCATDirectory(tid); |
| 1145 | }); | ||
| 1145 | const auto build_id_full = system.GetCurrentProcessBuildID(); | 1146 | const auto build_id_full = system.GetCurrentProcessBuildID(); |
| 1146 | u64 build_id{}; | 1147 | u64 build_id{}; |
| 1147 | std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); | 1148 | std::memcpy(&build_id, build_id_full.data(), sizeof(u64)); |
diff --git a/src/core/hle/service/apm/controller.cpp b/src/core/hle/service/apm/controller.cpp index 073d0f6fa..25a886238 100644 --- a/src/core/hle/service/apm/controller.cpp +++ b/src/core/hle/service/apm/controller.cpp | |||
| @@ -2,6 +2,10 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 5 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 6 | #include "core/core_timing.h" | 10 | #include "core/core_timing.h" |
| 7 | #include "core/hle/service/apm/controller.h" | 11 | #include "core/hle/service/apm/controller.h" |
| @@ -9,8 +13,7 @@ | |||
| 9 | 13 | ||
| 10 | namespace Service::APM { | 14 | namespace Service::APM { |
| 11 | 15 | ||
| 12 | constexpr PerformanceConfiguration DEFAULT_PERFORMANCE_CONFIGURATION = | 16 | constexpr auto DEFAULT_PERFORMANCE_CONFIGURATION = PerformanceConfiguration::Config7; |
| 13 | PerformanceConfiguration::Config7; | ||
| 14 | 17 | ||
| 15 | Controller::Controller(Core::Timing::CoreTiming& core_timing) | 18 | Controller::Controller(Core::Timing::CoreTiming& core_timing) |
| 16 | : core_timing{core_timing}, configs{ | 19 | : core_timing{core_timing}, configs{ |
| @@ -22,18 +25,35 @@ Controller::~Controller() = default; | |||
| 22 | 25 | ||
| 23 | void Controller::SetPerformanceConfiguration(PerformanceMode mode, | 26 | void Controller::SetPerformanceConfiguration(PerformanceMode mode, |
| 24 | PerformanceConfiguration config) { | 27 | PerformanceConfiguration config) { |
| 25 | static const std::map<PerformanceConfiguration, u32> PCONFIG_TO_SPEED_MAP{ | 28 | static constexpr std::array<std::pair<PerformanceConfiguration, u32>, 16> config_to_speed{{ |
| 26 | {PerformanceConfiguration::Config1, 1020}, {PerformanceConfiguration::Config2, 1020}, | 29 | {PerformanceConfiguration::Config1, 1020}, |
| 27 | {PerformanceConfiguration::Config3, 1224}, {PerformanceConfiguration::Config4, 1020}, | 30 | {PerformanceConfiguration::Config2, 1020}, |
| 28 | {PerformanceConfiguration::Config5, 1020}, {PerformanceConfiguration::Config6, 1224}, | 31 | {PerformanceConfiguration::Config3, 1224}, |
| 29 | {PerformanceConfiguration::Config7, 1020}, {PerformanceConfiguration::Config8, 1020}, | 32 | {PerformanceConfiguration::Config4, 1020}, |
| 30 | {PerformanceConfiguration::Config9, 1020}, {PerformanceConfiguration::Config10, 1020}, | 33 | {PerformanceConfiguration::Config5, 1020}, |
| 31 | {PerformanceConfiguration::Config11, 1020}, {PerformanceConfiguration::Config12, 1020}, | 34 | {PerformanceConfiguration::Config6, 1224}, |
| 32 | {PerformanceConfiguration::Config13, 1785}, {PerformanceConfiguration::Config14, 1785}, | 35 | {PerformanceConfiguration::Config7, 1020}, |
| 33 | {PerformanceConfiguration::Config15, 1020}, {PerformanceConfiguration::Config16, 1020}, | 36 | {PerformanceConfiguration::Config8, 1020}, |
| 34 | }; | 37 | {PerformanceConfiguration::Config9, 1020}, |
| 35 | 38 | {PerformanceConfiguration::Config10, 1020}, | |
| 36 | SetClockSpeed(PCONFIG_TO_SPEED_MAP.find(config)->second); | 39 | {PerformanceConfiguration::Config11, 1020}, |
| 40 | {PerformanceConfiguration::Config12, 1020}, | ||
| 41 | {PerformanceConfiguration::Config13, 1785}, | ||
| 42 | {PerformanceConfiguration::Config14, 1785}, | ||
| 43 | {PerformanceConfiguration::Config15, 1020}, | ||
| 44 | {PerformanceConfiguration::Config16, 1020}, | ||
| 45 | }}; | ||
| 46 | |||
| 47 | const auto iter = std::find_if(config_to_speed.cbegin(), config_to_speed.cend(), | ||
| 48 | [config](const auto& entry) { return entry.first == config; }); | ||
| 49 | |||
| 50 | if (iter == config_to_speed.cend()) { | ||
| 51 | LOG_ERROR(Service_APM, "Invalid performance configuration value provided: {}", | ||
| 52 | static_cast<u32>(config)); | ||
| 53 | return; | ||
| 54 | } | ||
| 55 | |||
| 56 | SetClockSpeed(iter->second); | ||
| 37 | configs.insert_or_assign(mode, config); | 57 | configs.insert_or_assign(mode, config); |
| 38 | } | 58 | } |
| 39 | 59 | ||
| @@ -48,7 +68,7 @@ void Controller::SetFromCpuBoostMode(CpuBoostMode mode) { | |||
| 48 | BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode))); | 68 | BOOST_MODE_TO_CONFIG_MAP.at(static_cast<u32>(mode))); |
| 49 | } | 69 | } |
| 50 | 70 | ||
| 51 | PerformanceMode Controller::GetCurrentPerformanceMode() { | 71 | PerformanceMode Controller::GetCurrentPerformanceMode() const { |
| 52 | return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld; | 72 | return Settings::values.use_docked_mode ? PerformanceMode::Docked : PerformanceMode::Handheld; |
| 53 | } | 73 | } |
| 54 | 74 | ||
diff --git a/src/core/hle/service/apm/controller.h b/src/core/hle/service/apm/controller.h index 454caa6eb..af0c4cd34 100644 --- a/src/core/hle/service/apm/controller.h +++ b/src/core/hle/service/apm/controller.h | |||
| @@ -56,7 +56,7 @@ public: | |||
| 56 | void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config); | 56 | void SetPerformanceConfiguration(PerformanceMode mode, PerformanceConfiguration config); |
| 57 | void SetFromCpuBoostMode(CpuBoostMode mode); | 57 | void SetFromCpuBoostMode(CpuBoostMode mode); |
| 58 | 58 | ||
| 59 | PerformanceMode GetCurrentPerformanceMode(); | 59 | PerformanceMode GetCurrentPerformanceMode() const; |
| 60 | PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode); | 60 | PerformanceConfiguration GetCurrentPerformanceConfiguration(PerformanceMode mode); |
| 61 | 61 | ||
| 62 | private: | 62 | private: |
diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 9d6946bc5..b86fda29a 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp | |||
| @@ -10,8 +10,8 @@ | |||
| 10 | 10 | ||
| 11 | namespace Service::BCAT { | 11 | namespace Service::BCAT { |
| 12 | 12 | ||
| 13 | ProgressServiceBackend::ProgressServiceBackend(std::string_view event_name) { | 13 | ProgressServiceBackend::ProgressServiceBackend(Kernel::KernelCore& kernel, |
| 14 | auto& kernel{Core::System::GetInstance().Kernel()}; | 14 | std::string_view event_name) { |
| 15 | event = Kernel::WritableEvent::CreateEventPair( | 15 | event = Kernel::WritableEvent::CreateEventPair( |
| 16 | kernel, Kernel::ResetType::Automatic, | 16 | kernel, Kernel::ResetType::Automatic, |
| 17 | std::string("ProgressServiceBackend:UpdateEvent:").append(event_name)); | 17 | std::string("ProgressServiceBackend:UpdateEvent:").append(event_name)); |
diff --git a/src/core/hle/service/bcat/backend/backend.h b/src/core/hle/service/bcat/backend/backend.h index 51dbd3316..ea4b16ad0 100644 --- a/src/core/hle/service/bcat/backend/backend.h +++ b/src/core/hle/service/bcat/backend/backend.h | |||
| @@ -15,6 +15,14 @@ | |||
| 15 | #include "core/hle/kernel/writable_event.h" | 15 | #include "core/hle/kernel/writable_event.h" |
| 16 | #include "core/hle/result.h" | 16 | #include "core/hle/result.h" |
| 17 | 17 | ||
| 18 | namespace Core { | ||
| 19 | class System; | ||
| 20 | } | ||
| 21 | |||
| 22 | namespace Kernel { | ||
| 23 | class KernelCore; | ||
| 24 | } | ||
| 25 | |||
| 18 | namespace Service::BCAT { | 26 | namespace Service::BCAT { |
| 19 | 27 | ||
| 20 | struct DeliveryCacheProgressImpl; | 28 | struct DeliveryCacheProgressImpl; |
| @@ -88,7 +96,7 @@ public: | |||
| 88 | void FinishDownload(ResultCode result); | 96 | void FinishDownload(ResultCode result); |
| 89 | 97 | ||
| 90 | private: | 98 | private: |
| 91 | explicit ProgressServiceBackend(std::string_view event_name); | 99 | explicit ProgressServiceBackend(Kernel::KernelCore& kernel, std::string_view event_name); |
| 92 | 100 | ||
| 93 | Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent() const; | 101 | Kernel::SharedPtr<Kernel::ReadableEvent> GetEvent() const; |
| 94 | DeliveryCacheProgressImpl& GetImpl(); | 102 | DeliveryCacheProgressImpl& GetImpl(); |
| @@ -145,6 +153,6 @@ public: | |||
| 145 | std::optional<std::vector<u8>> GetLaunchParameter(TitleIDVersion title) override; | 153 | std::optional<std::vector<u8>> GetLaunchParameter(TitleIDVersion title) override; |
| 146 | }; | 154 | }; |
| 147 | 155 | ||
| 148 | std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter); | 156 | std::unique_ptr<Backend> CreateBackendFromSettings(Core::System& system, DirectoryGetter getter); |
| 149 | 157 | ||
| 150 | } // namespace Service::BCAT | 158 | } // namespace Service::BCAT |
diff --git a/src/core/hle/service/bcat/backend/boxcat.cpp b/src/core/hle/service/bcat/backend/boxcat.cpp index 64022982b..918159e11 100644 --- a/src/core/hle/service/bcat/backend/boxcat.cpp +++ b/src/core/hle/service/bcat/backend/boxcat.cpp | |||
| @@ -104,14 +104,15 @@ std::string GetZIPFilePath(u64 title_id) { | |||
| 104 | 104 | ||
| 105 | // If the error is something the user should know about (build ID mismatch, bad client version), | 105 | // If the error is something the user should know about (build ID mismatch, bad client version), |
| 106 | // display an error. | 106 | // display an error. |
| 107 | void HandleDownloadDisplayResult(DownloadResult res) { | 107 | void HandleDownloadDisplayResult(const AM::Applets::AppletManager& applet_manager, |
| 108 | DownloadResult res) { | ||
| 108 | if (res == DownloadResult::Success || res == DownloadResult::NoResponse || | 109 | if (res == DownloadResult::Success || res == DownloadResult::NoResponse || |
| 109 | res == DownloadResult::GeneralWebError || res == DownloadResult::GeneralFSError || | 110 | res == DownloadResult::GeneralWebError || res == DownloadResult::GeneralFSError || |
| 110 | res == DownloadResult::NoMatchTitleId || res == DownloadResult::InvalidContentType) { | 111 | res == DownloadResult::NoMatchTitleId || res == DownloadResult::InvalidContentType) { |
| 111 | return; | 112 | return; |
| 112 | } | 113 | } |
| 113 | 114 | ||
| 114 | const auto& frontend{Core::System::GetInstance().GetAppletManager().GetAppletFrontendSet()}; | 115 | const auto& frontend{applet_manager.GetAppletFrontendSet()}; |
| 115 | frontend.error->ShowCustomErrorText( | 116 | frontend.error->ShowCustomErrorText( |
| 116 | ResultCode(-1), "There was an error while attempting to use Boxcat.", | 117 | ResultCode(-1), "There was an error while attempting to use Boxcat.", |
| 117 | DOWNLOAD_RESULT_LOG_MESSAGES[static_cast<std::size_t>(res)], [] {}); | 118 | DOWNLOAD_RESULT_LOG_MESSAGES[static_cast<std::size_t>(res)], [] {}); |
| @@ -264,12 +265,13 @@ private: | |||
| 264 | u64 build_id; | 265 | u64 build_id; |
| 265 | }; | 266 | }; |
| 266 | 267 | ||
| 267 | Boxcat::Boxcat(DirectoryGetter getter) : Backend(std::move(getter)) {} | 268 | Boxcat::Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter) |
| 269 | : Backend(std::move(getter)), applet_manager{applet_manager_} {} | ||
| 268 | 270 | ||
| 269 | Boxcat::~Boxcat() = default; | 271 | Boxcat::~Boxcat() = default; |
| 270 | 272 | ||
| 271 | void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, | 273 | void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, DirectoryGetter dir_getter, |
| 272 | ProgressServiceBackend& progress, | 274 | TitleIDVersion title, ProgressServiceBackend& progress, |
| 273 | std::optional<std::string> dir_name = {}) { | 275 | std::optional<std::string> dir_name = {}) { |
| 274 | progress.SetNeedHLELock(true); | 276 | progress.SetNeedHLELock(true); |
| 275 | 277 | ||
| @@ -295,7 +297,7 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, | |||
| 295 | FileUtil::Delete(zip_path); | 297 | FileUtil::Delete(zip_path); |
| 296 | } | 298 | } |
| 297 | 299 | ||
| 298 | HandleDownloadDisplayResult(res); | 300 | HandleDownloadDisplayResult(applet_manager, res); |
| 299 | progress.FinishDownload(ERROR_GENERAL_BCAT_FAILURE); | 301 | progress.FinishDownload(ERROR_GENERAL_BCAT_FAILURE); |
| 300 | return; | 302 | return; |
| 301 | } | 303 | } |
| @@ -364,17 +366,24 @@ void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, | |||
| 364 | 366 | ||
| 365 | bool Boxcat::Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) { | 367 | bool Boxcat::Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) { |
| 366 | is_syncing.exchange(true); | 368 | is_syncing.exchange(true); |
| 367 | std::thread([this, title, &progress] { SynchronizeInternal(dir_getter, title, progress); }) | 369 | |
| 370 | std::thread([this, title, &progress] { | ||
| 371 | SynchronizeInternal(applet_manager, dir_getter, title, progress); | ||
| 372 | }) | ||
| 368 | .detach(); | 373 | .detach(); |
| 374 | |||
| 369 | return true; | 375 | return true; |
| 370 | } | 376 | } |
| 371 | 377 | ||
| 372 | bool Boxcat::SynchronizeDirectory(TitleIDVersion title, std::string name, | 378 | bool Boxcat::SynchronizeDirectory(TitleIDVersion title, std::string name, |
| 373 | ProgressServiceBackend& progress) { | 379 | ProgressServiceBackend& progress) { |
| 374 | is_syncing.exchange(true); | 380 | is_syncing.exchange(true); |
| 375 | std::thread( | 381 | |
| 376 | [this, title, name, &progress] { SynchronizeInternal(dir_getter, title, progress, name); }) | 382 | std::thread([this, title, name, &progress] { |
| 383 | SynchronizeInternal(applet_manager, dir_getter, title, progress, name); | ||
| 384 | }) | ||
| 377 | .detach(); | 385 | .detach(); |
| 386 | |||
| 378 | return true; | 387 | return true; |
| 379 | } | 388 | } |
| 380 | 389 | ||
| @@ -420,7 +429,7 @@ std::optional<std::vector<u8>> Boxcat::GetLaunchParameter(TitleIDVersion title) | |||
| 420 | FileUtil::Delete(path); | 429 | FileUtil::Delete(path); |
| 421 | } | 430 | } |
| 422 | 431 | ||
| 423 | HandleDownloadDisplayResult(res); | 432 | HandleDownloadDisplayResult(applet_manager, res); |
| 424 | return std::nullopt; | 433 | return std::nullopt; |
| 425 | } | 434 | } |
| 426 | } | 435 | } |
diff --git a/src/core/hle/service/bcat/backend/boxcat.h b/src/core/hle/service/bcat/backend/boxcat.h index 601151189..d65b42e58 100644 --- a/src/core/hle/service/bcat/backend/boxcat.h +++ b/src/core/hle/service/bcat/backend/boxcat.h | |||
| @@ -9,6 +9,10 @@ | |||
| 9 | #include <optional> | 9 | #include <optional> |
| 10 | #include "core/hle/service/bcat/backend/backend.h" | 10 | #include "core/hle/service/bcat/backend/backend.h" |
| 11 | 11 | ||
| 12 | namespace Service::AM::Applets { | ||
| 13 | class AppletManager; | ||
| 14 | } | ||
| 15 | |||
| 12 | namespace Service::BCAT { | 16 | namespace Service::BCAT { |
| 13 | 17 | ||
| 14 | struct EventStatus { | 18 | struct EventStatus { |
| @@ -20,12 +24,13 @@ struct EventStatus { | |||
| 20 | /// Boxcat is yuzu's custom backend implementation of Nintendo's BCAT service. It is free to use and | 24 | /// Boxcat is yuzu's custom backend implementation of Nintendo's BCAT service. It is free to use and |
| 21 | /// doesn't require a switch or nintendo account. The content is controlled by the yuzu team. | 25 | /// doesn't require a switch or nintendo account. The content is controlled by the yuzu team. |
| 22 | class Boxcat final : public Backend { | 26 | class Boxcat final : public Backend { |
| 23 | friend void SynchronizeInternal(DirectoryGetter dir_getter, TitleIDVersion title, | 27 | friend void SynchronizeInternal(AM::Applets::AppletManager& applet_manager, |
| 28 | DirectoryGetter dir_getter, TitleIDVersion title, | ||
| 24 | ProgressServiceBackend& progress, | 29 | ProgressServiceBackend& progress, |
| 25 | std::optional<std::string> dir_name); | 30 | std::optional<std::string> dir_name); |
| 26 | 31 | ||
| 27 | public: | 32 | public: |
| 28 | explicit Boxcat(DirectoryGetter getter); | 33 | explicit Boxcat(AM::Applets::AppletManager& applet_manager_, DirectoryGetter getter); |
| 29 | ~Boxcat() override; | 34 | ~Boxcat() override; |
| 30 | 35 | ||
| 31 | bool Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) override; | 36 | bool Synchronize(TitleIDVersion title, ProgressServiceBackend& progress) override; |
| @@ -53,6 +58,7 @@ private: | |||
| 53 | 58 | ||
| 54 | class Client; | 59 | class Client; |
| 55 | std::unique_ptr<Client> client; | 60 | std::unique_ptr<Client> client; |
| 61 | AM::Applets::AppletManager& applet_manager; | ||
| 56 | }; | 62 | }; |
| 57 | 63 | ||
| 58 | } // namespace Service::BCAT | 64 | } // namespace Service::BCAT |
diff --git a/src/core/hle/service/bcat/module.cpp b/src/core/hle/service/bcat/module.cpp index 4e4aa758b..6d9d1527d 100644 --- a/src/core/hle/service/bcat/module.cpp +++ b/src/core/hle/service/bcat/module.cpp | |||
| @@ -125,7 +125,11 @@ private: | |||
| 125 | class IBcatService final : public ServiceFramework<IBcatService> { | 125 | class IBcatService final : public ServiceFramework<IBcatService> { |
| 126 | public: | 126 | public: |
| 127 | explicit IBcatService(Core::System& system_, Backend& backend_) | 127 | explicit IBcatService(Core::System& system_, Backend& backend_) |
| 128 | : ServiceFramework("IBcatService"), system{system_}, backend{backend_} { | 128 | : ServiceFramework("IBcatService"), system{system_}, backend{backend_}, |
| 129 | progress{{ | ||
| 130 | ProgressServiceBackend{system_.Kernel(), "Normal"}, | ||
| 131 | ProgressServiceBackend{system_.Kernel(), "Directory"}, | ||
| 132 | }} { | ||
| 129 | // clang-format off | 133 | // clang-format off |
| 130 | static const FunctionInfo functions[] = { | 134 | static const FunctionInfo functions[] = { |
| 131 | {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"}, | 135 | {10100, &IBcatService::RequestSyncDeliveryCache, "RequestSyncDeliveryCache"}, |
| @@ -249,10 +253,7 @@ private: | |||
| 249 | Core::System& system; | 253 | Core::System& system; |
| 250 | Backend& backend; | 254 | Backend& backend; |
| 251 | 255 | ||
| 252 | std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress{ | 256 | std::array<ProgressServiceBackend, static_cast<std::size_t>(SyncType::Count)> progress; |
| 253 | ProgressServiceBackend{"Normal"}, | ||
| 254 | ProgressServiceBackend{"Directory"}, | ||
| 255 | }; | ||
| 256 | }; | 257 | }; |
| 257 | 258 | ||
| 258 | void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) { | 259 | void Module::Interface::CreateBcatService(Kernel::HLERequestContext& ctx) { |
| @@ -557,12 +558,12 @@ void Module::Interface::CreateDeliveryCacheStorageServiceWithApplicationId( | |||
| 557 | rb.PushIpcInterface<IDeliveryCacheStorageService>(fsc.GetBCATDirectory(title_id)); | 558 | rb.PushIpcInterface<IDeliveryCacheStorageService>(fsc.GetBCATDirectory(title_id)); |
| 558 | } | 559 | } |
| 559 | 560 | ||
| 560 | std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { | 561 | std::unique_ptr<Backend> CreateBackendFromSettings([[maybe_unused]] Core::System& system, |
| 561 | const auto backend = Settings::values.bcat_backend; | 562 | DirectoryGetter getter) { |
| 562 | |||
| 563 | #ifdef YUZU_ENABLE_BOXCAT | 563 | #ifdef YUZU_ENABLE_BOXCAT |
| 564 | if (backend == "boxcat") | 564 | if (Settings::values.bcat_backend == "boxcat") { |
| 565 | return std::make_unique<Boxcat>(std::move(getter)); | 565 | return std::make_unique<Boxcat>(system.GetAppletManager(), std::move(getter)); |
| 566 | } | ||
| 566 | #endif | 567 | #endif |
| 567 | 568 | ||
| 568 | return std::make_unique<NullBackend>(std::move(getter)); | 569 | return std::make_unique<NullBackend>(std::move(getter)); |
| @@ -571,7 +572,8 @@ std::unique_ptr<Backend> CreateBackendFromSettings(DirectoryGetter getter) { | |||
| 571 | Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_, | 572 | Module::Interface::Interface(Core::System& system_, std::shared_ptr<Module> module_, |
| 572 | FileSystem::FileSystemController& fsc_, const char* name) | 573 | FileSystem::FileSystemController& fsc_, const char* name) |
| 573 | : ServiceFramework(name), fsc{fsc_}, module{std::move(module_)}, | 574 | : ServiceFramework(name), fsc{fsc_}, module{std::move(module_)}, |
| 574 | backend{CreateBackendFromSettings([&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, | 575 | backend{CreateBackendFromSettings(system_, |
| 576 | [&fsc_](u64 tid) { return fsc_.GetBCATDirectory(tid); })}, | ||
| 575 | system{system_} {} | 577 | system{system_} {} |
| 576 | 578 | ||
| 577 | Module::Interface::~Interface() = default; | 579 | Module::Interface::~Interface() = default; |
diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index a2b25a796..81bd2f3cb 100644 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp | |||
| @@ -583,36 +583,6 @@ bool Controller_NPad::SwapNpadAssignment(u32 npad_id_1, u32 npad_id_2) { | |||
| 583 | return true; | 583 | return true; |
| 584 | } | 584 | } |
| 585 | 585 | ||
| 586 | bool Controller_NPad::IsControllerSupported(NPadControllerType controller) { | ||
| 587 | if (controller == NPadControllerType::Handheld) { | ||
| 588 | // Handheld is not even a supported type, lets stop here | ||
| 589 | if (std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), | ||
| 590 | NPAD_HANDHELD) == supported_npad_id_types.end()) { | ||
| 591 | return false; | ||
| 592 | } | ||
| 593 | // Handheld should not be supported in docked mode | ||
| 594 | if (Settings::values.use_docked_mode) { | ||
| 595 | return false; | ||
| 596 | } | ||
| 597 | } | ||
| 598 | switch (controller) { | ||
| 599 | case NPadControllerType::ProController: | ||
| 600 | return style.pro_controller; | ||
| 601 | case NPadControllerType::Handheld: | ||
| 602 | return style.handheld; | ||
| 603 | case NPadControllerType::JoyDual: | ||
| 604 | return style.joycon_dual; | ||
| 605 | case NPadControllerType::JoyLeft: | ||
| 606 | return style.joycon_left; | ||
| 607 | case NPadControllerType::JoyRight: | ||
| 608 | return style.joycon_right; | ||
| 609 | case NPadControllerType::Pokeball: | ||
| 610 | return style.pokeball; | ||
| 611 | default: | ||
| 612 | return false; | ||
| 613 | } | ||
| 614 | } | ||
| 615 | |||
| 616 | Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) { | 586 | Controller_NPad::LedPattern Controller_NPad::GetLedPattern(u32 npad_id) { |
| 617 | if (npad_id == npad_id_list.back() || npad_id == npad_id_list[npad_id_list.size() - 2]) { | 587 | if (npad_id == npad_id_list.back() || npad_id == npad_id_list[npad_id_list.size() - 2]) { |
| 618 | // These are controllers without led patterns | 588 | // These are controllers without led patterns |
| @@ -659,25 +629,24 @@ void Controller_NPad::ClearAllConnectedControllers() { | |||
| 659 | } | 629 | } |
| 660 | 630 | ||
| 661 | void Controller_NPad::DisconnectAllConnectedControllers() { | 631 | void Controller_NPad::DisconnectAllConnectedControllers() { |
| 662 | std::for_each(connected_controllers.begin(), connected_controllers.end(), | 632 | for (ControllerHolder& controller : connected_controllers) { |
| 663 | [](ControllerHolder& controller) { controller.is_connected = false; }); | 633 | controller.is_connected = false; |
| 634 | } | ||
| 664 | } | 635 | } |
| 665 | 636 | ||
| 666 | void Controller_NPad::ConnectAllDisconnectedControllers() { | 637 | void Controller_NPad::ConnectAllDisconnectedControllers() { |
| 667 | std::for_each(connected_controllers.begin(), connected_controllers.end(), | 638 | for (ControllerHolder& controller : connected_controllers) { |
| 668 | [](ControllerHolder& controller) { | 639 | if (controller.type != NPadControllerType::None && !controller.is_connected) { |
| 669 | if (controller.type != NPadControllerType::None && !controller.is_connected) { | 640 | controller.is_connected = true; |
| 670 | controller.is_connected = false; | 641 | } |
| 671 | } | 642 | } |
| 672 | }); | ||
| 673 | } | 643 | } |
| 674 | 644 | ||
| 675 | void Controller_NPad::ClearAllControllers() { | 645 | void Controller_NPad::ClearAllControllers() { |
| 676 | std::for_each(connected_controllers.begin(), connected_controllers.end(), | 646 | for (ControllerHolder& controller : connected_controllers) { |
| 677 | [](ControllerHolder& controller) { | 647 | controller.type = NPadControllerType::None; |
| 678 | controller.type = NPadControllerType::None; | 648 | controller.is_connected = false; |
| 679 | controller.is_connected = false; | 649 | } |
| 680 | }); | ||
| 681 | } | 650 | } |
| 682 | 651 | ||
| 683 | u32 Controller_NPad::GetAndResetPressState() { | 652 | u32 Controller_NPad::GetAndResetPressState() { |
| @@ -685,10 +654,10 @@ u32 Controller_NPad::GetAndResetPressState() { | |||
| 685 | } | 654 | } |
| 686 | 655 | ||
| 687 | bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const { | 656 | bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const { |
| 688 | const bool support_handheld = | ||
| 689 | std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), NPAD_HANDHELD) != | ||
| 690 | supported_npad_id_types.end(); | ||
| 691 | if (controller == NPadControllerType::Handheld) { | 657 | if (controller == NPadControllerType::Handheld) { |
| 658 | const bool support_handheld = | ||
| 659 | std::find(supported_npad_id_types.begin(), supported_npad_id_types.end(), | ||
| 660 | NPAD_HANDHELD) != supported_npad_id_types.end(); | ||
| 692 | // Handheld is not even a supported type, lets stop here | 661 | // Handheld is not even a supported type, lets stop here |
| 693 | if (!support_handheld) { | 662 | if (!support_handheld) { |
| 694 | return false; | 663 | return false; |
| @@ -700,6 +669,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const | |||
| 700 | 669 | ||
| 701 | return true; | 670 | return true; |
| 702 | } | 671 | } |
| 672 | |||
| 703 | if (std::any_of(supported_npad_id_types.begin(), supported_npad_id_types.end(), | 673 | if (std::any_of(supported_npad_id_types.begin(), supported_npad_id_types.end(), |
| 704 | [](u32 npad_id) { return npad_id <= MAX_NPAD_ID; })) { | 674 | [](u32 npad_id) { return npad_id <= MAX_NPAD_ID; })) { |
| 705 | switch (controller) { | 675 | switch (controller) { |
| @@ -717,6 +687,7 @@ bool Controller_NPad::IsControllerSupported(NPadControllerType controller) const | |||
| 717 | return false; | 687 | return false; |
| 718 | } | 688 | } |
| 719 | } | 689 | } |
| 690 | |||
| 720 | return false; | 691 | return false; |
| 721 | } | 692 | } |
| 722 | 693 | ||
| @@ -795,6 +766,7 @@ Controller_NPad::NPadControllerType Controller_NPad::DecideBestController( | |||
| 795 | priority_list.push_back(NPadControllerType::JoyLeft); | 766 | priority_list.push_back(NPadControllerType::JoyLeft); |
| 796 | priority_list.push_back(NPadControllerType::JoyRight); | 767 | priority_list.push_back(NPadControllerType::JoyRight); |
| 797 | priority_list.push_back(NPadControllerType::JoyDual); | 768 | priority_list.push_back(NPadControllerType::JoyDual); |
| 769 | break; | ||
| 798 | } | 770 | } |
| 799 | 771 | ||
| 800 | const auto iter = std::find_if(priority_list.begin(), priority_list.end(), | 772 | const auto iter = std::find_if(priority_list.begin(), priority_list.end(), |
diff --git a/src/core/hle/service/hid/controllers/npad.h b/src/core/hle/service/hid/controllers/npad.h index 1bc3d55d6..16c4caa1f 100644 --- a/src/core/hle/service/hid/controllers/npad.h +++ b/src/core/hle/service/hid/controllers/npad.h | |||
| @@ -301,6 +301,11 @@ private: | |||
| 301 | bool is_connected; | 301 | bool is_connected; |
| 302 | }; | 302 | }; |
| 303 | 303 | ||
| 304 | void InitNewlyAddedControler(std::size_t controller_idx); | ||
| 305 | bool IsControllerSupported(NPadControllerType controller) const; | ||
| 306 | NPadControllerType DecideBestController(NPadControllerType priority) const; | ||
| 307 | void RequestPadStateUpdate(u32 npad_id); | ||
| 308 | |||
| 304 | u32 press_state{}; | 309 | u32 press_state{}; |
| 305 | 310 | ||
| 306 | NPadType style{}; | 311 | NPadType style{}; |
| @@ -321,12 +326,7 @@ private: | |||
| 321 | std::array<ControllerHolder, 10> connected_controllers{}; | 326 | std::array<ControllerHolder, 10> connected_controllers{}; |
| 322 | bool can_controllers_vibrate{true}; | 327 | bool can_controllers_vibrate{true}; |
| 323 | 328 | ||
| 324 | void InitNewlyAddedControler(std::size_t controller_idx); | ||
| 325 | bool IsControllerSupported(NPadControllerType controller) const; | ||
| 326 | NPadControllerType DecideBestController(NPadControllerType priority) const; | ||
| 327 | void RequestPadStateUpdate(u32 npad_id); | ||
| 328 | std::array<ControllerPad, 10> npad_pad_states{}; | 329 | std::array<ControllerPad, 10> npad_pad_states{}; |
| 329 | bool IsControllerSupported(NPadControllerType controller); | ||
| 330 | bool is_in_lr_assignment_mode{false}; | 330 | bool is_in_lr_assignment_mode{false}; |
| 331 | Core::System& system; | 331 | Core::System& system; |
| 332 | }; | 332 | }; |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f764388bc..3f7b8e670 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | ||
| 8 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" | 9 | #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" |
| 9 | #include "core/hle/service/nvdrv/devices/nvmap.h" | 10 | #include "core/hle/service/nvdrv/devices/nvmap.h" |
| 10 | #include "core/perf_stats.h" | 11 | #include "core/perf_stats.h" |
| @@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 38 | transform, crop_rect}; | 39 | transform, crop_rect}; |
| 39 | 40 | ||
| 40 | system.GetPerfStats().EndGameFrame(); | 41 | system.GetPerfStats().EndGameFrame(); |
| 42 | system.GetPerfStats().EndSystemFrame(); | ||
| 41 | system.GPU().SwapBuffers(&framebuffer); | 43 | system.GPU().SwapBuffers(&framebuffer); |
| 44 | system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||
| 45 | system.GetPerfStats().BeginSystemFrame(); | ||
| 42 | } | 46 | } |
| 43 | 47 | ||
| 44 | } // namespace Service::Nvidia::Devices | 48 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index eb88fee1b..b27ee0502 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp | |||
| @@ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 63 | return NvResult::BadParameter; | 63 | return NvResult::BadParameter; |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | u32 event_id = params.value & 0x00FF; | ||
| 67 | |||
| 68 | if (event_id >= MaxNvEvents) { | ||
| 69 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 70 | return NvResult::BadParameter; | ||
| 71 | } | ||
| 72 | |||
| 73 | auto event = events_interface.events[event_id]; | ||
| 66 | auto& gpu = system.GPU(); | 74 | auto& gpu = system.GPU(); |
| 67 | // This is mostly to take into account unimplemented features. As synced | 75 | // This is mostly to take into account unimplemented features. As synced |
| 68 | // gpu is always synced. | 76 | // gpu is always synced. |
| 69 | if (!gpu.IsAsync()) { | 77 | if (!gpu.IsAsync()) { |
| 78 | event.writable->Signal(); | ||
| 70 | return NvResult::Success; | 79 | return NvResult::Success; |
| 71 | } | 80 | } |
| 72 | auto lock = gpu.LockSync(); | 81 | auto lock = gpu.LockSync(); |
| 73 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); | 82 | const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); |
| 74 | const s32 diff = current_syncpoint_value - params.threshold; | 83 | const s32 diff = current_syncpoint_value - params.threshold; |
| 75 | if (diff >= 0) { | 84 | if (diff >= 0) { |
| 85 | event.writable->Signal(); | ||
| 76 | params.value = current_syncpoint_value; | 86 | params.value = current_syncpoint_value; |
| 77 | std::memcpy(output.data(), ¶ms, sizeof(params)); | 87 | std::memcpy(output.data(), ¶ms, sizeof(params)); |
| 78 | return NvResult::Success; | 88 | return NvResult::Success; |
| @@ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 88 | return NvResult::Timeout; | 98 | return NvResult::Timeout; |
| 89 | } | 99 | } |
| 90 | 100 | ||
| 91 | u32 event_id; | ||
| 92 | if (is_async) { | ||
| 93 | event_id = params.value & 0x00FF; | ||
| 94 | if (event_id >= MaxNvEvents) { | ||
| 95 | std::memcpy(output.data(), ¶ms, sizeof(params)); | ||
| 96 | return NvResult::BadParameter; | ||
| 97 | } | ||
| 98 | } else { | ||
| 99 | if (ctrl.fresh_call) { | ||
| 100 | const auto result = events_interface.GetFreeEvent(); | ||
| 101 | if (result) { | ||
| 102 | event_id = *result; | ||
| 103 | } else { | ||
| 104 | LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); | ||
| 105 | event_id = params.value & 0x00FF; | ||
| 106 | } | ||
| 107 | } else { | ||
| 108 | event_id = ctrl.event_id; | ||
| 109 | } | ||
| 110 | } | ||
| 111 | |||
| 112 | EventState status = events_interface.status[event_id]; | 101 | EventState status = events_interface.status[event_id]; |
| 113 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { | 102 | if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { |
| 114 | events_interface.SetEventStatus(event_id, EventState::Waiting); | 103 | events_interface.SetEventStatus(event_id, EventState::Waiting); |
| @@ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>& | |||
| 120 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; | 109 | params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; |
| 121 | } | 110 | } |
| 122 | params.value |= event_id; | 111 | params.value |= event_id; |
| 123 | events_interface.events[event_id].writable->Clear(); | 112 | event.writable->Clear(); |
| 124 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); | 113 | gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); |
| 125 | if (!is_async && ctrl.fresh_call) { | 114 | if (!is_async && ctrl.fresh_call) { |
| 126 | ctrl.must_delay = true; | 115 | ctrl.must_delay = true; |
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 5e0c23602..68d139cfb 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp | |||
| @@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { | |||
| 134 | IPC::ResponseBuilder rb{ctx, 3, 1}; | 134 | IPC::ResponseBuilder rb{ctx, 3, 1}; |
| 135 | rb.Push(RESULT_SUCCESS); | 135 | rb.Push(RESULT_SUCCESS); |
| 136 | if (event_id < MaxNvEvents) { | 136 | if (event_id < MaxNvEvents) { |
| 137 | rb.PushCopyObjects(nvdrv->GetEvent(event_id)); | 137 | auto event = nvdrv->GetEvent(event_id); |
| 138 | event->Clear(); | ||
| 139 | rb.PushCopyObjects(event); | ||
| 138 | rb.Push<u32>(NvResult::Success); | 140 | rb.Push<u32>(NvResult::Success); |
| 139 | } else { | 141 | } else { |
| 140 | rb.Push<u32>(0); | 142 | rb.Push<u32>(0); |
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 307a7e928..7bfb99e34 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp | |||
| @@ -40,8 +40,8 @@ Module::Module(Core::System& system) { | |||
| 40 | auto& kernel = system.Kernel(); | 40 | auto& kernel = system.Kernel(); |
| 41 | for (u32 i = 0; i < MaxNvEvents; i++) { | 41 | for (u32 i = 0; i < MaxNvEvents; i++) { |
| 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); | 42 | std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); |
| 43 | events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( | 43 | events_interface.events[i] = |
| 44 | kernel, Kernel::ResetType::Automatic, event_label); | 44 | Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label); |
| 45 | events_interface.status[i] = EventState::Free; | 45 | events_interface.status[i] = EventState::Free; |
| 46 | events_interface.registered[i] = false; | 46 | events_interface.registered[i] = false; |
| 47 | } | 47 | } |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index e1a07d3ee..55b68eb0c 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp | |||
| @@ -14,8 +14,8 @@ | |||
| 14 | 14 | ||
| 15 | namespace Service::NVFlinger { | 15 | namespace Service::NVFlinger { |
| 16 | 16 | ||
| 17 | BufferQueue::BufferQueue(u32 id, u64 layer_id) : id(id), layer_id(layer_id) { | 17 | BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id) |
| 18 | auto& kernel = Core::System::GetInstance().Kernel(); | 18 | : id(id), layer_id(layer_id) { |
| 19 | buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, | 19 | buffer_wait_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, |
| 20 | "BufferQueue NativeHandle"); | 20 | "BufferQueue NativeHandle"); |
| 21 | } | 21 | } |
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 356bedb81..8f9b18547 100644 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h | |||
| @@ -15,6 +15,10 @@ | |||
| 15 | #include "core/hle/kernel/writable_event.h" | 15 | #include "core/hle/kernel/writable_event.h" |
| 16 | #include "core/hle/service/nvdrv/nvdata.h" | 16 | #include "core/hle/service/nvdrv/nvdata.h" |
| 17 | 17 | ||
| 18 | namespace Kernel { | ||
| 19 | class KernelCore; | ||
| 20 | } | ||
| 21 | |||
| 18 | namespace Service::NVFlinger { | 22 | namespace Service::NVFlinger { |
| 19 | 23 | ||
| 20 | struct IGBPBuffer { | 24 | struct IGBPBuffer { |
| @@ -44,7 +48,7 @@ public: | |||
| 44 | NativeWindowFormat = 2, | 48 | NativeWindowFormat = 2, |
| 45 | }; | 49 | }; |
| 46 | 50 | ||
| 47 | BufferQueue(u32 id, u64 layer_id); | 51 | explicit BufferQueue(Kernel::KernelCore& kernel, u32 id, u64 layer_id); |
| 48 | ~BufferQueue(); | 52 | ~BufferQueue(); |
| 49 | 53 | ||
| 50 | enum class BufferTransformFlags : u32 { | 54 | enum class BufferTransformFlags : u32 { |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 2e4d707b9..cc9522aad 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -83,7 +83,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) { | |||
| 83 | 83 | ||
| 84 | const u64 layer_id = next_layer_id++; | 84 | const u64 layer_id = next_layer_id++; |
| 85 | const u32 buffer_queue_id = next_buffer_queue_id++; | 85 | const u32 buffer_queue_id = next_buffer_queue_id++; |
| 86 | buffer_queues.emplace_back(buffer_queue_id, layer_id); | 86 | buffer_queues.emplace_back(system.Kernel(), buffer_queue_id, layer_id); |
| 87 | display->CreateLayer(layer_id, buffer_queues.back()); | 87 | display->CreateLayer(layer_id, buffer_queues.back()); |
| 88 | return layer_id; | 88 | return layer_id; |
| 89 | } | 89 | } |
| @@ -187,14 +187,18 @@ void NVFlinger::Compose() { | |||
| 187 | MicroProfileFlip(); | 187 | MicroProfileFlip(); |
| 188 | 188 | ||
| 189 | if (!buffer) { | 189 | if (!buffer) { |
| 190 | // There was no queued buffer to draw, render previous frame | ||
| 191 | system.GetPerfStats().EndGameFrame(); | ||
| 192 | system.GPU().SwapBuffers({}); | ||
| 193 | continue; | 190 | continue; |
| 194 | } | 191 | } |
| 195 | 192 | ||
| 196 | const auto& igbp_buffer = buffer->get().igbp_buffer; | 193 | const auto& igbp_buffer = buffer->get().igbp_buffer; |
| 197 | 194 | ||
| 195 | const auto& gpu = system.GPU(); | ||
| 196 | const auto& multi_fence = buffer->get().multi_fence; | ||
| 197 | for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { | ||
| 198 | const auto& fence = multi_fence.fences[fence_id]; | ||
| 199 | gpu.WaitFence(fence.id, fence.value); | ||
| 200 | } | ||
| 201 | |||
| 198 | // Now send the buffer to the GPU for drawing. | 202 | // Now send the buffer to the GPU for drawing. |
| 199 | // TODO(Subv): Support more than just disp0. The display device selection is probably based | 203 | // TODO(Subv): Support more than just disp0. The display device selection is probably based |
| 200 | // on which display we're drawing (Default, Internal, External, etc) | 204 | // on which display we're drawing (Default, Internal, External, etc) |
diff --git a/src/core/memory/cheat_engine.cpp b/src/core/memory/cheat_engine.cpp index b56cb0627..10821d452 100644 --- a/src/core/memory/cheat_engine.cpp +++ b/src/core/memory/cheat_engine.cpp | |||
| @@ -22,7 +22,7 @@ constexpr u32 KEYPAD_BITMASK = 0x3FFFFFF; | |||
| 22 | 22 | ||
| 23 | StandardVmCallbacks::StandardVmCallbacks(const Core::System& system, | 23 | StandardVmCallbacks::StandardVmCallbacks(const Core::System& system, |
| 24 | const CheatProcessMetadata& metadata) | 24 | const CheatProcessMetadata& metadata) |
| 25 | : system(system), metadata(metadata) {} | 25 | : metadata(metadata), system(system) {} |
| 26 | 26 | ||
| 27 | StandardVmCallbacks::~StandardVmCallbacks() = default; | 27 | StandardVmCallbacks::~StandardVmCallbacks() = default; |
| 28 | 28 | ||
| @@ -176,9 +176,8 @@ std::vector<CheatEntry> TextCheatParser::Parse(const Core::System& system, | |||
| 176 | 176 | ||
| 177 | CheatEngine::CheatEngine(Core::System& system, std::vector<CheatEntry> cheats, | 177 | CheatEngine::CheatEngine(Core::System& system, std::vector<CheatEntry> cheats, |
| 178 | const std::array<u8, 0x20>& build_id) | 178 | const std::array<u8, 0x20>& build_id) |
| 179 | : system{system}, core_timing{system.CoreTiming()}, vm{std::make_unique<StandardVmCallbacks>( | 179 | : vm{std::make_unique<StandardVmCallbacks>(system, metadata)}, |
| 180 | system, metadata)}, | 180 | cheats(std::move(cheats)), core_timing{system.CoreTiming()}, system{system} { |
| 181 | cheats(std::move(cheats)) { | ||
| 182 | metadata.main_nso_build_id = build_id; | 181 | metadata.main_nso_build_id = build_id; |
| 183 | } | 182 | } |
| 184 | 183 | ||
diff --git a/src/core/memory/dmnt_cheat_vm.cpp b/src/core/memory/dmnt_cheat_vm.cpp index cc16d15a4..4f4fa5099 100644 --- a/src/core/memory/dmnt_cheat_vm.cpp +++ b/src/core/memory/dmnt_cheat_vm.cpp | |||
| @@ -1133,8 +1133,8 @@ void DmntCheatVm::Execute(const CheatProcessMetadata& metadata) { | |||
| 1133 | case SaveRestoreRegisterOpType::ClearRegs: | 1133 | case SaveRestoreRegisterOpType::ClearRegs: |
| 1134 | case SaveRestoreRegisterOpType::Restore: | 1134 | case SaveRestoreRegisterOpType::Restore: |
| 1135 | default: | 1135 | default: |
| 1136 | src = registers.data(); | 1136 | src = saved_values.data(); |
| 1137 | dst = saved_values.data(); | 1137 | dst = registers.data(); |
| 1138 | break; | 1138 | break; |
| 1139 | } | 1139 | } |
| 1140 | for (std::size_t i = 0; i < NumRegisters; i++) { | 1140 | for (std::size_t i = 0; i < NumRegisters; i++) { |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index eaa694ff8..cb6eda1b8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -6,6 +6,7 @@ add_library(video_core STATIC | |||
| 6 | dma_pusher.h | 6 | dma_pusher.h |
| 7 | debug_utils/debug_utils.cpp | 7 | debug_utils/debug_utils.cpp |
| 8 | debug_utils/debug_utils.h | 8 | debug_utils/debug_utils.h |
| 9 | engines/const_buffer_engine_interface.h | ||
| 9 | engines/const_buffer_info.h | 10 | engines/const_buffer_info.h |
| 10 | engines/engine_upload.cpp | 11 | engines/engine_upload.cpp |
| 11 | engines/engine_upload.h | 12 | engines/engine_upload.h |
| @@ -107,10 +108,12 @@ add_library(video_core STATIC | |||
| 107 | shader/decode/other.cpp | 108 | shader/decode/other.cpp |
| 108 | shader/ast.cpp | 109 | shader/ast.cpp |
| 109 | shader/ast.h | 110 | shader/ast.h |
| 110 | shader/control_flow.cpp | ||
| 111 | shader/control_flow.h | ||
| 112 | shader/compiler_settings.cpp | 111 | shader/compiler_settings.cpp |
| 113 | shader/compiler_settings.h | 112 | shader/compiler_settings.h |
| 113 | shader/const_buffer_locker.cpp | ||
| 114 | shader/const_buffer_locker.h | ||
| 115 | shader/control_flow.cpp | ||
| 116 | shader/control_flow.h | ||
| 114 | shader/decode.cpp | 117 | shader/decode.cpp |
| 115 | shader/expr.cpp | 118 | shader/expr.cpp |
| 116 | shader/expr.h | 119 | shader/expr.h |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h new file mode 100644 index 000000000..ac27b6cbe --- /dev/null +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -0,0 +1,119 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <type_traits> | ||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/textures/texture.h" | ||
| 12 | |||
| 13 | namespace Tegra::Engines { | ||
| 14 | |||
| 15 | enum class ShaderType : u32 { | ||
| 16 | Vertex = 0, | ||
| 17 | TesselationControl = 1, | ||
| 18 | TesselationEval = 2, | ||
| 19 | Geometry = 3, | ||
| 20 | Fragment = 4, | ||
| 21 | Compute = 5, | ||
| 22 | }; | ||
| 23 | |||
| 24 | struct SamplerDescriptor { | ||
| 25 | union { | ||
| 26 | BitField<0, 20, Tegra::Shader::TextureType> texture_type; | ||
| 27 | BitField<20, 1, u32> is_array; | ||
| 28 | BitField<21, 1, u32> is_buffer; | ||
| 29 | BitField<22, 1, u32> is_shadow; | ||
| 30 | u32 raw{}; | ||
| 31 | }; | ||
| 32 | |||
| 33 | bool operator==(const SamplerDescriptor& rhs) const noexcept { | ||
| 34 | return raw == rhs.raw; | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const SamplerDescriptor& rhs) const noexcept { | ||
| 38 | return !operator==(rhs); | ||
| 39 | } | ||
| 40 | |||
| 41 | static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { | ||
| 42 | SamplerDescriptor result; | ||
| 43 | switch (tic_texture_type) { | ||
| 44 | case Tegra::Texture::TextureType::Texture1D: | ||
| 45 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||
| 46 | result.is_array.Assign(0); | ||
| 47 | result.is_buffer.Assign(0); | ||
| 48 | result.is_shadow.Assign(0); | ||
| 49 | return result; | ||
| 50 | case Tegra::Texture::TextureType::Texture2D: | ||
| 51 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 52 | result.is_array.Assign(0); | ||
| 53 | result.is_buffer.Assign(0); | ||
| 54 | result.is_shadow.Assign(0); | ||
| 55 | return result; | ||
| 56 | case Tegra::Texture::TextureType::Texture3D: | ||
| 57 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); | ||
| 58 | result.is_array.Assign(0); | ||
| 59 | result.is_buffer.Assign(0); | ||
| 60 | result.is_shadow.Assign(0); | ||
| 61 | return result; | ||
| 62 | case Tegra::Texture::TextureType::TextureCubemap: | ||
| 63 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | ||
| 64 | result.is_array.Assign(0); | ||
| 65 | result.is_buffer.Assign(0); | ||
| 66 | result.is_shadow.Assign(0); | ||
| 67 | return result; | ||
| 68 | case Tegra::Texture::TextureType::Texture1DArray: | ||
| 69 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||
| 70 | result.is_array.Assign(1); | ||
| 71 | result.is_buffer.Assign(0); | ||
| 72 | result.is_shadow.Assign(0); | ||
| 73 | return result; | ||
| 74 | case Tegra::Texture::TextureType::Texture2DArray: | ||
| 75 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 76 | result.is_array.Assign(1); | ||
| 77 | result.is_buffer.Assign(0); | ||
| 78 | result.is_shadow.Assign(0); | ||
| 79 | return result; | ||
| 80 | case Tegra::Texture::TextureType::Texture1DBuffer: | ||
| 81 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||
| 82 | result.is_array.Assign(0); | ||
| 83 | result.is_buffer.Assign(1); | ||
| 84 | result.is_shadow.Assign(0); | ||
| 85 | return result; | ||
| 86 | case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||
| 87 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 88 | result.is_array.Assign(0); | ||
| 89 | result.is_buffer.Assign(0); | ||
| 90 | result.is_shadow.Assign(0); | ||
| 91 | return result; | ||
| 92 | case Tegra::Texture::TextureType::TextureCubeArray: | ||
| 93 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | ||
| 94 | result.is_array.Assign(1); | ||
| 95 | result.is_buffer.Assign(0); | ||
| 96 | result.is_shadow.Assign(0); | ||
| 97 | return result; | ||
| 98 | default: | ||
| 99 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 100 | result.is_array.Assign(0); | ||
| 101 | result.is_buffer.Assign(0); | ||
| 102 | result.is_shadow.Assign(0); | ||
| 103 | return result; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | }; | ||
| 107 | static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); | ||
| 108 | |||
| 109 | class ConstBufferEngineInterface { | ||
| 110 | public: | ||
| 111 | virtual ~ConstBufferEngineInterface() = default; | ||
| 112 | virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; | ||
| 113 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; | ||
| 114 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 115 | u64 offset) const = 0; | ||
| 116 | virtual u32 GetBoundBuffer() const = 0; | ||
| 117 | }; | ||
| 118 | |||
| 119 | } // namespace Tegra::Engines | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 63d449135..91adef360 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -70,13 +70,31 @@ Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHan | |||
| 70 | GetTSCEntry(tex_handle.tsc_id)}; | 70 | GetTSCEntry(tex_handle.tsc_id)}; |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { | 73 | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { |
| 74 | ASSERT(stage == ShaderType::Compute); | ||
| 74 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; | 75 | const auto& buffer = launch_description.const_buffer_config[const_buffer]; |
| 75 | u32 result; | 76 | u32 result; |
| 76 | std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); | 77 | std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); |
| 77 | return result; | 78 | return result; |
| 78 | } | 79 | } |
| 79 | 80 | ||
| 81 | SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 82 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 83 | } | ||
| 84 | |||
| 85 | SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 86 | u64 offset) const { | ||
| 87 | ASSERT(stage == ShaderType::Compute); | ||
| 88 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; | ||
| 89 | const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; | ||
| 90 | |||
| 91 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 92 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); | ||
| 93 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | ||
| 94 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | ||
| 95 | return result; | ||
| 96 | } | ||
| 97 | |||
| 80 | void KeplerCompute::ProcessLaunch() { | 98 | void KeplerCompute::ProcessLaunch() { |
| 81 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 99 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 82 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 100 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 90cf650d2..8e7182727 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 13 | #include "video_core/engines/engine_upload.h" | 14 | #include "video_core/engines/engine_upload.h" |
| 14 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 15 | #include "video_core/textures/texture.h" | 16 | #include "video_core/textures/texture.h" |
| @@ -37,7 +38,7 @@ namespace Tegra::Engines { | |||
| 37 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 38 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 38 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | 39 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 39 | 40 | ||
| 40 | class KeplerCompute final { | 41 | class KeplerCompute final : public ConstBufferEngineInterface { |
| 41 | public: | 42 | public: |
| 42 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 43 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 43 | MemoryManager& memory_manager); | 44 | MemoryManager& memory_manager); |
| @@ -201,7 +202,16 @@ public: | |||
| 201 | Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, | 202 | Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, |
| 202 | std::size_t offset) const; | 203 | std::size_t offset) const; |
| 203 | 204 | ||
| 204 | u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; | 205 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 206 | |||
| 207 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 208 | |||
| 209 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 210 | u64 offset) const override; | ||
| 211 | |||
| 212 | u32 GetBoundBuffer() const override { | ||
| 213 | return regs.tex_cb_index; | ||
| 214 | } | ||
| 205 | 215 | ||
| 206 | private: | 216 | private: |
| 207 | Core::System& system; | 217 | Core::System& system; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7802fd808..514ed93fa 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() { | |||
| 98 | mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; | 98 | mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | #define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name)) | 101 | #define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name)) |
| 102 | 102 | ||
| 103 | void Maxwell3D::InitDirtySettings() { | 103 | void Maxwell3D::InitDirtySettings() { |
| 104 | const auto set_block = [this](const u32 start, const u32 range, const u8 position) { | 104 | const auto set_block = [this](std::size_t start, std::size_t range, u8 position) { |
| 105 | const auto start_itr = dirty_pointers.begin() + start; | 105 | const auto start_itr = dirty_pointers.begin() + start; |
| 106 | const auto end_itr = start_itr + range; | 106 | const auto end_itr = start_itr + range; |
| 107 | std::fill(start_itr, end_itr, position); | 107 | std::fill(start_itr, end_itr, position); |
| @@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() { | |||
| 112 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); | 112 | constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); |
| 113 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); | 113 | constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); |
| 114 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; | 114 | constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; |
| 115 | u32 rt_dirty_reg = DIRTY_REGS_POS(render_target); | 115 | u8 rt_dirty_reg = DIRTY_REGS_POS(render_target); |
| 116 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { | 116 | for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { |
| 117 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); | 117 | set_block(rt_reg, registers_per_rt, rt_dirty_reg); |
| 118 | rt_dirty_reg++; | 118 | ++rt_dirty_reg; |
| 119 | } | 119 | } |
| 120 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); | 120 | constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); |
| 121 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; | 121 | dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; |
| @@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() { | |||
| 129 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); | 129 | constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); |
| 130 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); | 130 | constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); |
| 131 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; | 131 | constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; |
| 132 | u32 va_reg = DIRTY_REGS_POS(vertex_array); | 132 | u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array); |
| 133 | u32 vi_reg = DIRTY_REGS_POS(vertex_instance); | 133 | u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); |
| 134 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; | 134 | for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; |
| 135 | vertex_reg += vertex_array_size) { | 135 | vertex_reg += vertex_array_size) { |
| 136 | set_block(vertex_reg, 3, va_reg); | 136 | set_block(vertex_reg, 3, va_dirty_reg); |
| 137 | // The divisor concerns vertex array instances | 137 | // The divisor concerns vertex array instances |
| 138 | dirty_pointers[vertex_reg + 3] = vi_reg; | 138 | dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg; |
| 139 | va_reg++; | 139 | ++va_dirty_reg; |
| 140 | vi_reg++; | 140 | ++vi_dirty_reg; |
| 141 | } | 141 | } |
| 142 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); | 142 | constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); |
| 143 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); | 143 | constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); |
| 144 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; | 144 | constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; |
| 145 | va_reg = DIRTY_REGS_POS(vertex_array); | 145 | va_dirty_reg = DIRTY_REGS_POS(vertex_array); |
| 146 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; | 146 | for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; |
| 147 | vertex_reg += vertex_limit_size) { | 147 | vertex_reg += vertex_limit_size) { |
| 148 | set_block(vertex_reg, vertex_limit_size, va_reg); | 148 | set_block(vertex_reg, vertex_limit_size, va_dirty_reg); |
| 149 | va_reg++; | 149 | va_dirty_reg++; |
| 150 | } | 150 | } |
| 151 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); | 151 | constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); |
| 152 | constexpr u32 vertex_instance_size = | 152 | constexpr u32 vertex_instance_size = |
| 153 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); | 153 | sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); |
| 154 | constexpr u32 vertex_instance_end = | 154 | constexpr u32 vertex_instance_end = |
| 155 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; | 155 | vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; |
| 156 | vi_reg = DIRTY_REGS_POS(vertex_instance); | 156 | vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); |
| 157 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; | 157 | for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; |
| 158 | vertex_reg += vertex_instance_size) { | 158 | vertex_reg += vertex_instance_size) { |
| 159 | set_block(vertex_reg, vertex_instance_size, vi_reg); | 159 | set_block(vertex_reg, vertex_instance_size, vi_dirty_reg); |
| 160 | vi_reg++; | 160 | vi_dirty_reg++; |
| 161 | } | 161 | } |
| 162 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), | 162 | set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), |
| 163 | DIRTY_REGS_POS(vertex_attrib_format)); | 163 | DIRTY_REGS_POS(vertex_attrib_format)); |
| @@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() { | |||
| 171 | // State | 171 | // State |
| 172 | 172 | ||
| 173 | // Viewport | 173 | // Viewport |
| 174 | constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport); | 174 | constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport); |
| 175 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); | 175 | constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); |
| 176 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); | 176 | constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); |
| 177 | set_block(viewport_start, viewport_size, viewport_dirty_reg); | 177 | set_block(viewport_start, viewport_size, viewport_dirty_reg); |
| @@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() { | |||
| 198 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); | 198 | set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); |
| 199 | 199 | ||
| 200 | // Depth Test | 200 | // Depth Test |
| 201 | constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); | 201 | constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); |
| 202 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; | 202 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; |
| 203 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; | 203 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; |
| 204 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; | 204 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; |
| @@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() { | |||
| 223 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; | 223 | dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; |
| 224 | 224 | ||
| 225 | // Color Mask | 225 | // Color Mask |
| 226 | constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); | 226 | constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); |
| 227 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; | 227 | dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; |
| 228 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), | 228 | set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), |
| 229 | color_mask_dirty_reg); | 229 | color_mask_dirty_reg); |
| 230 | // Blend State | 230 | // Blend State |
| 231 | constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); | 231 | constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); |
| 232 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), | 232 | set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), |
| 233 | blend_state_dirty_reg); | 233 | blend_state_dirty_reg); |
| 234 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; | 234 | dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; |
| @@ -237,12 +237,12 @@ void Maxwell3D::InitDirtySettings() { | |||
| 237 | blend_state_dirty_reg); | 237 | blend_state_dirty_reg); |
| 238 | 238 | ||
| 239 | // Scissor State | 239 | // Scissor State |
| 240 | constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); | 240 | constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); |
| 241 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), | 241 | set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), |
| 242 | scissor_test_dirty_reg); | 242 | scissor_test_dirty_reg); |
| 243 | 243 | ||
| 244 | // Polygon Offset | 244 | // Polygon Offset |
| 245 | constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); | 245 | constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); |
| 246 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; | 246 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; |
| 247 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; | 247 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; |
| 248 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; | 248 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; |
| @@ -251,7 +251,7 @@ void Maxwell3D::InitDirtySettings() { | |||
| 251 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; | 251 | dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; |
| 252 | 252 | ||
| 253 | // Depth bounds | 253 | // Depth bounds |
| 254 | constexpr u32 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); | 254 | constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); |
| 255 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; | 255 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; |
| 256 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; | 256 | dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; |
| 257 | } | 257 | } |
| @@ -478,7 +478,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) { | |||
| 478 | } | 478 | } |
| 479 | 479 | ||
| 480 | void Maxwell3D::FlushMMEInlineDraw() { | 480 | void Maxwell3D::FlushMMEInlineDraw() { |
| 481 | LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), | 481 | LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()), |
| 482 | regs.vertex_buffer.count); | 482 | regs.vertex_buffer.count); |
| 483 | ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); | 483 | ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); |
| 484 | ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); | 484 | ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); |
| @@ -846,7 +846,8 @@ void Maxwell3D::ProcessClearBuffers() { | |||
| 846 | rasterizer.Clear(); | 846 | rasterizer.Clear(); |
| 847 | } | 847 | } |
| 848 | 848 | ||
| 849 | u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { | 849 | u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { |
| 850 | ASSERT(stage != ShaderType::Compute); | ||
| 850 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; | 851 | const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; |
| 851 | const auto& buffer = shader_stage.const_buffers[const_buffer]; | 852 | const auto& buffer = shader_stage.const_buffers[const_buffer]; |
| 852 | u32 result; | 853 | u32 result; |
| @@ -854,4 +855,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6 | |||
| 854 | return result; | 855 | return result; |
| 855 | } | 856 | } |
| 856 | 857 | ||
| 858 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 859 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 860 | } | ||
| 861 | |||
| 862 | SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 863 | u64 offset) const { | ||
| 864 | ASSERT(stage != ShaderType::Compute); | ||
| 865 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 866 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; | ||
| 867 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; | ||
| 868 | |||
| 869 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 870 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); | ||
| 871 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | ||
| 872 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | ||
| 873 | return result; | ||
| 874 | } | ||
| 875 | |||
| 857 | } // namespace Tegra::Engines | 876 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e3f1047d5..987ad77b2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/common_funcs.h" | 15 | #include "common/common_funcs.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/math_util.h" | 17 | #include "common/math_util.h" |
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 18 | #include "video_core/engines/const_buffer_info.h" | 19 | #include "video_core/engines/const_buffer_info.h" |
| 19 | #include "video_core/engines/engine_upload.h" | 20 | #include "video_core/engines/engine_upload.h" |
| 20 | #include "video_core/gpu.h" | 21 | #include "video_core/gpu.h" |
| @@ -44,7 +45,7 @@ namespace Tegra::Engines { | |||
| 44 | #define MAXWELL3D_REG_INDEX(field_name) \ | 45 | #define MAXWELL3D_REG_INDEX(field_name) \ |
| 45 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | 46 | (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |
| 46 | 47 | ||
| 47 | class Maxwell3D final { | 48 | class Maxwell3D final : public ConstBufferEngineInterface { |
| 48 | public: | 49 | public: |
| 49 | explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 50 | explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 50 | MemoryManager& memory_manager); | 51 | MemoryManager& memory_manager); |
| @@ -1165,6 +1166,8 @@ public: | |||
| 1165 | 1166 | ||
| 1166 | struct DirtyRegs { | 1167 | struct DirtyRegs { |
| 1167 | static constexpr std::size_t NUM_REGS = 256; | 1168 | static constexpr std::size_t NUM_REGS = 256; |
| 1169 | static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max()); | ||
| 1170 | |||
| 1168 | union { | 1171 | union { |
| 1169 | struct { | 1172 | struct { |
| 1170 | bool null_dirty; | 1173 | bool null_dirty; |
| @@ -1257,7 +1260,16 @@ public: | |||
| 1257 | /// Returns the texture information for a specific texture in a specific shader stage. | 1260 | /// Returns the texture information for a specific texture in a specific shader stage. |
| 1258 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | 1261 | Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; |
| 1259 | 1262 | ||
| 1260 | u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; | 1263 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 1264 | |||
| 1265 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 1266 | |||
| 1267 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 1268 | u64 offset) const override; | ||
| 1269 | |||
| 1270 | u32 GetBoundBuffer() const override { | ||
| 1271 | return regs.tex_cb_index; | ||
| 1272 | } | ||
| 1261 | 1273 | ||
| 1262 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | 1274 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than |
| 1263 | /// we've seen used. | 1275 | /// we've seen used. |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7a6355ce2..d3d05a866 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 { | |||
| 574 | }; | 574 | }; |
| 575 | 575 | ||
| 576 | union Instruction { | 576 | union Instruction { |
| 577 | Instruction& operator=(const Instruction& instr) { | 577 | constexpr Instruction& operator=(const Instruction& instr) { |
| 578 | value = instr.value; | 578 | value = instr.value; |
| 579 | return *this; | 579 | return *this; |
| 580 | } | 580 | } |
| @@ -1760,22 +1760,22 @@ public: | |||
| 1760 | 1760 | ||
| 1761 | class Matcher { | 1761 | class Matcher { |
| 1762 | public: | 1762 | public: |
| 1763 | Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) | 1763 | constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type) |
| 1764 | : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} | 1764 | : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {} |
| 1765 | 1765 | ||
| 1766 | const char* GetName() const { | 1766 | constexpr const char* GetName() const { |
| 1767 | return name; | 1767 | return name; |
| 1768 | } | 1768 | } |
| 1769 | 1769 | ||
| 1770 | u16 GetMask() const { | 1770 | constexpr u16 GetMask() const { |
| 1771 | return mask; | 1771 | return mask; |
| 1772 | } | 1772 | } |
| 1773 | 1773 | ||
| 1774 | Id GetId() const { | 1774 | constexpr Id GetId() const { |
| 1775 | return id; | 1775 | return id; |
| 1776 | } | 1776 | } |
| 1777 | 1777 | ||
| 1778 | Type GetType() const { | 1778 | constexpr Type GetType() const { |
| 1779 | return type; | 1779 | return type; |
| 1780 | } | 1780 | } |
| 1781 | 1781 | ||
| @@ -1784,7 +1784,7 @@ public: | |||
| 1784 | * @param instruction The instruction to test | 1784 | * @param instruction The instruction to test |
| 1785 | * @returns true if the given instruction matches. | 1785 | * @returns true if the given instruction matches. |
| 1786 | */ | 1786 | */ |
| 1787 | bool Matches(u16 instruction) const { | 1787 | constexpr bool Matches(u16 instruction) const { |
| 1788 | return (instruction & mask) == expected; | 1788 | return (instruction & mask) == expected; |
| 1789 | } | 1789 | } |
| 1790 | 1790 | ||
| @@ -1818,7 +1818,7 @@ private: | |||
| 1818 | * A '0' in a bitstring indicates that a zero must be present at that bit position. | 1818 | * A '0' in a bitstring indicates that a zero must be present at that bit position. |
| 1819 | * A '1' in a bitstring indicates that a one must be present at that bit position. | 1819 | * A '1' in a bitstring indicates that a one must be present at that bit position. |
| 1820 | */ | 1820 | */ |
| 1821 | static auto GetMaskAndExpect(const char* const bitstring) { | 1821 | static constexpr auto GetMaskAndExpect(const char* const bitstring) { |
| 1822 | u16 mask = 0, expect = 0; | 1822 | u16 mask = 0, expect = 0; |
| 1823 | for (std::size_t i = 0; i < opcode_bitsize; i++) { | 1823 | for (std::size_t i = 0; i < opcode_bitsize; i++) { |
| 1824 | const std::size_t bit_position = opcode_bitsize - i - 1; | 1824 | const std::size_t bit_position = opcode_bitsize - i - 1; |
| @@ -1835,15 +1835,15 @@ private: | |||
| 1835 | break; | 1835 | break; |
| 1836 | } | 1836 | } |
| 1837 | } | 1837 | } |
| 1838 | return std::make_tuple(mask, expect); | 1838 | return std::make_pair(mask, expect); |
| 1839 | } | 1839 | } |
| 1840 | 1840 | ||
| 1841 | public: | 1841 | public: |
| 1842 | /// Creates a matcher that can match and parse instructions based on bitstring. | 1842 | /// Creates a matcher that can match and parse instructions based on bitstring. |
| 1843 | static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type, | 1843 | static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type, |
| 1844 | const char* const name) { | 1844 | const char* const name) { |
| 1845 | const auto mask_expect = GetMaskAndExpect(bitstring); | 1845 | const auto [mask, expected] = GetMaskAndExpect(bitstring); |
| 1846 | return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type); | 1846 | return Matcher(name, mask, expected, op, type); |
| 1847 | } | 1847 | } |
| 1848 | }; | 1848 | }; |
| 1849 | 1849 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..095660115 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | ||
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/core_timing.h" | 8 | #include "core/core_timing.h" |
| 8 | #include "core/memory.h" | 9 | #include "core/memory.h" |
| @@ -17,6 +18,8 @@ | |||
| 17 | 18 | ||
| 18 | namespace Tegra { | 19 | namespace Tegra { |
| 19 | 20 | ||
| 21 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 22 | |||
| 20 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) | 23 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) |
| 21 | : system{system}, renderer{renderer}, is_async{is_async} { | 24 | : system{system}, renderer{renderer}, is_async{is_async} { |
| 22 | auto& rasterizer{renderer.Rasterizer()}; | 25 | auto& rasterizer{renderer.Rasterizer()}; |
| @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { | |||
| 63 | return *dma_pusher; | 66 | return *dma_pusher; |
| 64 | } | 67 | } |
| 65 | 68 | ||
| 69 | void GPU::WaitFence(u32 syncpoint_id, u32 value) const { | ||
| 70 | // Synced GPU, is always in sync | ||
| 71 | if (!is_async) { | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | MICROPROFILE_SCOPE(GPU_wait); | ||
| 75 | while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 66 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { | 79 | void GPU::IncrementSyncPoint(const u32 syncpoint_id) { |
| 67 | syncpoints[syncpoint_id]++; | 80 | syncpoints[syncpoint_id]++; |
| 68 | std::lock_guard lock{sync_mutex}; | 81 | std::lock_guard lock{sync_mutex}; |
| @@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 326 | block.sequence = regs.semaphore_sequence; | 339 | block.sequence = regs.semaphore_sequence; |
| 327 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | 340 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of |
| 328 | // CoreTiming | 341 | // CoreTiming |
| 329 | block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); | 342 | block.timestamp = system.CoreTiming().GetTicks(); |
| 330 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | 343 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, |
| 331 | sizeof(block)); | 344 | sizeof(block)); |
| 332 | } else { | 345 | } else { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -177,6 +177,12 @@ public: | |||
| 177 | /// Returns a reference to the GPU DMA pusher. | 177 | /// Returns a reference to the GPU DMA pusher. |
| 178 | Tegra::DmaPusher& DmaPusher(); | 178 | Tegra::DmaPusher& DmaPusher(); |
| 179 | 179 | ||
| 180 | // Waits for the GPU to finish working | ||
| 181 | virtual void WaitIdle() const = 0; | ||
| 182 | |||
| 183 | /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||
| 184 | void WaitFence(u32 syncpoint_id, u32 value) const; | ||
| 185 | |||
| 180 | void IncrementSyncPoint(u32 syncpoint_id); | 186 | void IncrementSyncPoint(u32 syncpoint_id); |
| 181 | 187 | ||
| 182 | u32 GetSyncpointValue(u32 syncpoint_id) const; | 188 | u32 GetSyncpointValue(u32 syncpoint_id) const; |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con | |||
| 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | 44 | interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | void GPUAsynch::WaitIdle() const { | ||
| 48 | gpu_thread.WaitIdle(); | ||
| 49 | } | ||
| 50 | |||
| 47 | } // namespace VideoCommon | 51 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -25,6 +25,7 @@ public: | |||
| 25 | void FlushRegion(CacheAddr addr, u64 size) override; | 25 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 27 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 28 | void WaitIdle() const override; | ||
| 28 | 29 | ||
| 29 | protected: | 30 | protected: |
| 30 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | 31 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; |
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -24,6 +24,7 @@ public: | |||
| 24 | void FlushRegion(CacheAddr addr, u64 size) override; | 24 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | void WaitIdle() const override {} | ||
| 27 | 28 | ||
| 28 | protected: | 29 | protected: |
| 29 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, | 30 | void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -5,8 +5,6 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | ||
| 9 | #include "core/core_timing_util.h" | ||
| 10 | #include "core/frontend/scope_acquire_window_context.h" | 8 | #include "core/frontend/scope_acquire_window_context.h" |
| 11 | #include "video_core/dma_pusher.h" | 9 | #include "video_core/dma_pusher.h" |
| 12 | #include "video_core/gpu.h" | 10 | #include "video_core/gpu.h" |
| @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { | |||
| 68 | 66 | ||
| 69 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { | 67 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { |
| 70 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; | 68 | thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; |
| 71 | synchronization_event = system.CoreTiming().RegisterEvent( | ||
| 72 | "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); | ||
| 73 | } | 69 | } |
| 74 | 70 | ||
| 75 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 71 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| 76 | const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; | 72 | PushCommand(SubmitListCommand(std::move(entries))); |
| 77 | const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; | ||
| 78 | system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); | ||
| 79 | } | 73 | } |
| 80 | 74 | ||
| 81 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 75 | void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| @@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | |||
| 96 | InvalidateRegion(addr, size); | 90 | InvalidateRegion(addr, size); |
| 97 | } | 91 | } |
| 98 | 92 | ||
| 93 | void ThreadManager::WaitIdle() const { | ||
| 94 | while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { | ||
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 99 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 98 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 100 | const u64 fence{++state.last_fence}; | 99 | const u64 fence{++state.last_fence}; |
| 101 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 100 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
| 102 | return fence; | 101 | return fence; |
| 103 | } | 102 | } |
| 104 | 103 | ||
| 105 | MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); | ||
| 106 | void SynchState::WaitForSynchronization(u64 fence) { | ||
| 107 | while (signaled_fence.load() < fence) | ||
| 108 | ; | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace VideoCommon::GPUThread | 104 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -21,9 +21,6 @@ class DmaPusher; | |||
| 21 | 21 | ||
| 22 | namespace Core { | 22 | namespace Core { |
| 23 | class System; | 23 | class System; |
| 24 | namespace Timing { | ||
| 25 | struct EventType; | ||
| 26 | } // namespace Timing | ||
| 27 | } // namespace Core | 24 | } // namespace Core |
| 28 | 25 | ||
| 29 | namespace VideoCommon::GPUThread { | 26 | namespace VideoCommon::GPUThread { |
| @@ -89,8 +86,6 @@ struct CommandDataContainer { | |||
| 89 | struct SynchState final { | 86 | struct SynchState final { |
| 90 | std::atomic_bool is_running{true}; | 87 | std::atomic_bool is_running{true}; |
| 91 | 88 | ||
| 92 | void WaitForSynchronization(u64 fence); | ||
| 93 | |||
| 94 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | 89 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; |
| 95 | CommandQueue queue; | 90 | CommandQueue queue; |
| 96 | u64 last_fence{}; | 91 | u64 last_fence{}; |
| @@ -121,6 +116,9 @@ public: | |||
| 121 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 116 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 122 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | 117 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); |
| 123 | 118 | ||
| 119 | // Wait until the gpu thread is idle. | ||
| 120 | void WaitIdle() const; | ||
| 121 | |||
| 124 | private: | 122 | private: |
| 125 | /// Pushes a command to be executed by the GPU thread | 123 | /// Pushes a command to be executed by the GPU thread |
| 126 | u64 PushCommand(CommandData&& command_data); | 124 | u64 PushCommand(CommandData&& command_data); |
| @@ -128,7 +126,6 @@ private: | |||
| 128 | private: | 126 | private: |
| 129 | SynchState state; | 127 | SynchState state; |
| 130 | Core::System& system; | 128 | Core::System& system; |
| 131 | Core::Timing::EventType* synchronization_event{}; | ||
| 132 | std::thread thread; | 129 | std::thread thread; |
| 133 | std::thread::id thread_id; | 130 | std::thread::id thread_id; |
| 134 | }; | 131 | }; |
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index dbaeac6db..42031d80a 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp | |||
| @@ -11,6 +11,77 @@ | |||
| 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | 11 | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); |
| 12 | 12 | ||
| 13 | namespace Tegra { | 13 | namespace Tegra { |
| 14 | namespace { | ||
| 15 | enum class Operation : u32 { | ||
| 16 | ALU = 0, | ||
| 17 | AddImmediate = 1, | ||
| 18 | ExtractInsert = 2, | ||
| 19 | ExtractShiftLeftImmediate = 3, | ||
| 20 | ExtractShiftLeftRegister = 4, | ||
| 21 | Read = 5, | ||
| 22 | Unused = 6, // This operation doesn't seem to be a valid encoding. | ||
| 23 | Branch = 7, | ||
| 24 | }; | ||
| 25 | } // Anonymous namespace | ||
| 26 | |||
| 27 | enum class MacroInterpreter::ALUOperation : u32 { | ||
| 28 | Add = 0, | ||
| 29 | AddWithCarry = 1, | ||
| 30 | Subtract = 2, | ||
| 31 | SubtractWithBorrow = 3, | ||
| 32 | // Operations 4-7 don't seem to be valid encodings. | ||
| 33 | Xor = 8, | ||
| 34 | Or = 9, | ||
| 35 | And = 10, | ||
| 36 | AndNot = 11, | ||
| 37 | Nand = 12 | ||
| 38 | }; | ||
| 39 | |||
| 40 | enum class MacroInterpreter::ResultOperation : u32 { | ||
| 41 | IgnoreAndFetch = 0, | ||
| 42 | Move = 1, | ||
| 43 | MoveAndSetMethod = 2, | ||
| 44 | FetchAndSend = 3, | ||
| 45 | MoveAndSend = 4, | ||
| 46 | FetchAndSetMethod = 5, | ||
| 47 | MoveAndSetMethodFetchAndSend = 6, | ||
| 48 | MoveAndSetMethodSend = 7 | ||
| 49 | }; | ||
| 50 | |||
| 51 | enum class MacroInterpreter::BranchCondition : u32 { | ||
| 52 | Zero = 0, | ||
| 53 | NotZero = 1, | ||
| 54 | }; | ||
| 55 | |||
| 56 | union MacroInterpreter::Opcode { | ||
| 57 | u32 raw; | ||
| 58 | BitField<0, 3, Operation> operation; | ||
| 59 | BitField<4, 3, ResultOperation> result_operation; | ||
| 60 | BitField<4, 1, BranchCondition> branch_condition; | ||
| 61 | // If set on a branch, then the branch doesn't have a delay slot. | ||
| 62 | BitField<5, 1, u32> branch_annul; | ||
| 63 | BitField<7, 1, u32> is_exit; | ||
| 64 | BitField<8, 3, u32> dst; | ||
| 65 | BitField<11, 3, u32> src_a; | ||
| 66 | BitField<14, 3, u32> src_b; | ||
| 67 | // The signed immediate overlaps the second source operand and the alu operation. | ||
| 68 | BitField<14, 18, s32> immediate; | ||
| 69 | |||
| 70 | BitField<17, 5, ALUOperation> alu_operation; | ||
| 71 | |||
| 72 | // Bitfield instructions data | ||
| 73 | BitField<17, 5, u32> bf_src_bit; | ||
| 74 | BitField<22, 5, u32> bf_size; | ||
| 75 | BitField<27, 5, u32> bf_dst_bit; | ||
| 76 | |||
| 77 | u32 GetBitfieldMask() const { | ||
| 78 | return (1 << bf_size) - 1; | ||
| 79 | } | ||
| 80 | |||
| 81 | s32 GetBranchTarget() const { | ||
| 82 | return static_cast<s32>(immediate * sizeof(u32)); | ||
| 83 | } | ||
| 84 | }; | ||
| 14 | 85 | ||
| 15 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | 86 | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} |
| 16 | 87 | ||
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h index 76b6a895b..631146d89 100644 --- a/src/video_core/macro_interpreter.h +++ b/src/video_core/macro_interpreter.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <vector> | ||
| 10 | 9 | ||
| 11 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 12 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -28,75 +27,11 @@ public: | |||
| 28 | void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); | 27 | void Execute(u32 offset, std::size_t num_parameters, const u32* parameters); |
| 29 | 28 | ||
| 30 | private: | 29 | private: |
| 31 | enum class Operation : u32 { | 30 | enum class ALUOperation : u32; |
| 32 | ALU = 0, | 31 | enum class BranchCondition : u32; |
| 33 | AddImmediate = 1, | 32 | enum class ResultOperation : u32; |
| 34 | ExtractInsert = 2, | ||
| 35 | ExtractShiftLeftImmediate = 3, | ||
| 36 | ExtractShiftLeftRegister = 4, | ||
| 37 | Read = 5, | ||
| 38 | Unused = 6, // This operation doesn't seem to be a valid encoding. | ||
| 39 | Branch = 7, | ||
| 40 | }; | ||
| 41 | |||
| 42 | enum class ALUOperation : u32 { | ||
| 43 | Add = 0, | ||
| 44 | AddWithCarry = 1, | ||
| 45 | Subtract = 2, | ||
| 46 | SubtractWithBorrow = 3, | ||
| 47 | // Operations 4-7 don't seem to be valid encodings. | ||
| 48 | Xor = 8, | ||
| 49 | Or = 9, | ||
| 50 | And = 10, | ||
| 51 | AndNot = 11, | ||
| 52 | Nand = 12 | ||
| 53 | }; | ||
| 54 | |||
| 55 | enum class ResultOperation : u32 { | ||
| 56 | IgnoreAndFetch = 0, | ||
| 57 | Move = 1, | ||
| 58 | MoveAndSetMethod = 2, | ||
| 59 | FetchAndSend = 3, | ||
| 60 | MoveAndSend = 4, | ||
| 61 | FetchAndSetMethod = 5, | ||
| 62 | MoveAndSetMethodFetchAndSend = 6, | ||
| 63 | MoveAndSetMethodSend = 7 | ||
| 64 | }; | ||
| 65 | 33 | ||
| 66 | enum class BranchCondition : u32 { | 34 | union Opcode; |
| 67 | Zero = 0, | ||
| 68 | NotZero = 1, | ||
| 69 | }; | ||
| 70 | |||
| 71 | union Opcode { | ||
| 72 | u32 raw; | ||
| 73 | BitField<0, 3, Operation> operation; | ||
| 74 | BitField<4, 3, ResultOperation> result_operation; | ||
| 75 | BitField<4, 1, BranchCondition> branch_condition; | ||
| 76 | BitField<5, 1, u32> | ||
| 77 | branch_annul; // If set on a branch, then the branch doesn't have a delay slot. | ||
| 78 | BitField<7, 1, u32> is_exit; | ||
| 79 | BitField<8, 3, u32> dst; | ||
| 80 | BitField<11, 3, u32> src_a; | ||
| 81 | BitField<14, 3, u32> src_b; | ||
| 82 | // The signed immediate overlaps the second source operand and the alu operation. | ||
| 83 | BitField<14, 18, s32> immediate; | ||
| 84 | |||
| 85 | BitField<17, 5, ALUOperation> alu_operation; | ||
| 86 | |||
| 87 | // Bitfield instructions data | ||
| 88 | BitField<17, 5, u32> bf_src_bit; | ||
| 89 | BitField<22, 5, u32> bf_size; | ||
| 90 | BitField<27, 5, u32> bf_dst_bit; | ||
| 91 | |||
| 92 | u32 GetBitfieldMask() const { | ||
| 93 | return (1 << bf_size) - 1; | ||
| 94 | } | ||
| 95 | |||
| 96 | s32 GetBranchTarget() const { | ||
| 97 | return static_cast<s32>(immediate * sizeof(u32)); | ||
| 98 | } | ||
| 99 | }; | ||
| 100 | 35 | ||
| 101 | union MethodAddress { | 36 | union MethodAddress { |
| 102 | u32 raw; | 37 | u32 raw; |
| @@ -149,9 +84,10 @@ private: | |||
| 149 | 84 | ||
| 150 | Engines::Maxwell3D& maxwell3d; | 85 | Engines::Maxwell3D& maxwell3d; |
| 151 | 86 | ||
| 152 | u32 pc; ///< Current program counter | 87 | /// Current program counter |
| 153 | std::optional<u32> | 88 | u32 pc; |
| 154 | delayed_pc; ///< Program counter to execute at after the delay slot is executed. | 89 | /// Program counter to execute at after the delay slot is executed. |
| 90 | std::optional<u32> delayed_pc; | ||
| 155 | 91 | ||
| 156 | static constexpr std::size_t NumMacroRegisters = 8; | 92 | static constexpr std::size_t NumMacroRegisters = 8; |
| 157 | 93 | ||
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index ab71870ab..fe5f08ace 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp | |||
| @@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = { | |||
| 93 | MortonCopy<true, PixelFormat::DXT23_SRGB>, | 93 | MortonCopy<true, PixelFormat::DXT23_SRGB>, |
| 94 | MortonCopy<true, PixelFormat::DXT45_SRGB>, | 94 | MortonCopy<true, PixelFormat::DXT45_SRGB>, |
| 95 | MortonCopy<true, PixelFormat::BC7U_SRGB>, | 95 | MortonCopy<true, PixelFormat::BC7U_SRGB>, |
| 96 | MortonCopy<true, PixelFormat::R4G4B4A4U>, | ||
| 96 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, | 97 | MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>, |
| 97 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, | 98 | MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>, |
| 98 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, | 99 | MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>, |
| @@ -101,6 +102,16 @@ static constexpr ConversionArray morton_to_linear_fns = { | |||
| 101 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, | 102 | MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>, |
| 102 | MortonCopy<true, PixelFormat::ASTC_2D_10X8>, | 103 | MortonCopy<true, PixelFormat::ASTC_2D_10X8>, |
| 103 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, | 104 | MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>, |
| 105 | MortonCopy<true, PixelFormat::ASTC_2D_6X6>, | ||
| 106 | MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>, | ||
| 107 | MortonCopy<true, PixelFormat::ASTC_2D_10X10>, | ||
| 108 | MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>, | ||
| 109 | MortonCopy<true, PixelFormat::ASTC_2D_12X12>, | ||
| 110 | MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>, | ||
| 111 | MortonCopy<true, PixelFormat::ASTC_2D_8X6>, | ||
| 112 | MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>, | ||
| 113 | MortonCopy<true, PixelFormat::ASTC_2D_6X5>, | ||
| 114 | MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>, | ||
| 104 | MortonCopy<true, PixelFormat::Z32F>, | 115 | MortonCopy<true, PixelFormat::Z32F>, |
| 105 | MortonCopy<true, PixelFormat::Z16>, | 116 | MortonCopy<true, PixelFormat::Z16>, |
| 106 | MortonCopy<true, PixelFormat::Z24S8>, | 117 | MortonCopy<true, PixelFormat::Z24S8>, |
| @@ -162,6 +173,17 @@ static constexpr ConversionArray linear_to_morton_fns = { | |||
| 162 | MortonCopy<false, PixelFormat::DXT23_SRGB>, | 173 | MortonCopy<false, PixelFormat::DXT23_SRGB>, |
| 163 | MortonCopy<false, PixelFormat::DXT45_SRGB>, | 174 | MortonCopy<false, PixelFormat::DXT45_SRGB>, |
| 164 | MortonCopy<false, PixelFormat::BC7U_SRGB>, | 175 | MortonCopy<false, PixelFormat::BC7U_SRGB>, |
| 176 | MortonCopy<false, PixelFormat::R4G4B4A4U>, | ||
| 177 | nullptr, | ||
| 178 | nullptr, | ||
| 179 | nullptr, | ||
| 180 | nullptr, | ||
| 181 | nullptr, | ||
| 182 | nullptr, | ||
| 183 | nullptr, | ||
| 184 | nullptr, | ||
| 185 | nullptr, | ||
| 186 | nullptr, | ||
| 165 | nullptr, | 187 | nullptr, |
| 166 | nullptr, | 188 | nullptr, |
| 167 | nullptr, | 189 | nullptr, |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a85f730a8..9431d64ac 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | |||
| 348 | } | 348 | } |
| 349 | 349 | ||
| 350 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | 350 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |
| 351 | std::lock_guard lock{pages_mutex}; | ||
| 351 | const u64 page_start{addr >> Memory::PAGE_BITS}; | 352 | const u64 page_start{addr >> Memory::PAGE_BITS}; |
| 352 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; | 353 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; |
| 353 | 354 | ||
| @@ -974,7 +975,8 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag | |||
| 974 | } | 975 | } |
| 975 | const auto cbuf = entry.GetBindlessCBuf(); | 976 | const auto cbuf = entry.GetBindlessCBuf(); |
| 976 | Tegra::Texture::TextureHandle tex_handle; | 977 | Tegra::Texture::TextureHandle tex_handle; |
| 977 | tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); | 978 | Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage); |
| 979 | tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second); | ||
| 978 | return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); | 980 | return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); |
| 979 | }(); | 981 | }(); |
| 980 | 982 | ||
| @@ -1004,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) | |||
| 1004 | } | 1006 | } |
| 1005 | const auto cbuf = entry.GetBindlessCBuf(); | 1007 | const auto cbuf = entry.GetBindlessCBuf(); |
| 1006 | Tegra::Texture::TextureHandle tex_handle; | 1008 | Tegra::Texture::TextureHandle tex_handle; |
| 1007 | tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); | 1009 | tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, |
| 1010 | cbuf.first, cbuf.second); | ||
| 1008 | return compute.GetTextureInfo(tex_handle, entry.GetOffset()); | 1011 | return compute.GetTextureInfo(tex_handle, entry.GetOffset()); |
| 1009 | }(); | 1012 | }(); |
| 1010 | 1013 | ||
| @@ -1049,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | |||
| 1049 | } | 1052 | } |
| 1050 | const auto cbuf = entry.GetBindlessCBuf(); | 1053 | const auto cbuf = entry.GetBindlessCBuf(); |
| 1051 | Tegra::Texture::TextureHandle tex_handle; | 1054 | Tegra::Texture::TextureHandle tex_handle; |
| 1052 | tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); | 1055 | tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, |
| 1056 | cbuf.first, cbuf.second); | ||
| 1053 | return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; | 1057 | return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; |
| 1054 | }(); | 1058 | }(); |
| 1055 | SetupImage(bindpoint, tic, entry); | 1059 | SetupImage(bindpoint, tic, entry); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include <cstddef> | 9 | #include <cstddef> |
| 10 | #include <map> | 10 | #include <map> |
| 11 | #include <memory> | 11 | #include <memory> |
| 12 | #include <mutex> | ||
| 12 | #include <optional> | 13 | #include <optional> |
| 13 | #include <tuple> | 14 | #include <tuple> |
| 14 | #include <utility> | 15 | #include <utility> |
| @@ -230,6 +231,8 @@ private: | |||
| 230 | 231 | ||
| 231 | using CachedPageMap = boost::icl::interval_map<u64, int>; | 232 | using CachedPageMap = boost::icl::interval_map<u64, int>; |
| 232 | CachedPageMap cached_pages; | 233 | CachedPageMap cached_pages; |
| 234 | |||
| 235 | std::mutex pages_mutex; | ||
| 233 | }; | 236 | }; |
| 234 | 237 | ||
| 235 | } // namespace OpenGL | 238 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ca3b1bd..f1b89165d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -3,13 +3,16 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <mutex> | 5 | #include <mutex> |
| 6 | #include <optional> | ||
| 7 | #include <string> | ||
| 6 | #include <thread> | 8 | #include <thread> |
| 9 | #include <unordered_set> | ||
| 7 | #include <boost/functional/hash.hpp> | 10 | #include <boost/functional/hash.hpp> |
| 8 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 9 | #include "common/hash.h" | ||
| 10 | #include "common/scope_exit.h" | 12 | #include "common/scope_exit.h" |
| 11 | #include "core/core.h" | 13 | #include "core/core.h" |
| 12 | #include "core/frontend/emu_window.h" | 14 | #include "core/frontend/emu_window.h" |
| 15 | #include "video_core/engines/kepler_compute.h" | ||
| 13 | #include "video_core/engines/maxwell_3d.h" | 16 | #include "video_core/engines/maxwell_3d.h" |
| 14 | #include "video_core/memory_manager.h" | 17 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_rasterizer.h" | 18 | #include "video_core/renderer_opengl/gl_rasterizer.h" |
| @@ -21,18 +24,20 @@ | |||
| 21 | 24 | ||
| 22 | namespace OpenGL { | 25 | namespace OpenGL { |
| 23 | 26 | ||
| 27 | using Tegra::Engines::ShaderType; | ||
| 28 | using VideoCommon::Shader::ConstBufferLocker; | ||
| 24 | using VideoCommon::Shader::ProgramCode; | 29 | using VideoCommon::Shader::ProgramCode; |
| 30 | using VideoCommon::Shader::ShaderIR; | ||
| 31 | |||
| 32 | namespace { | ||
| 25 | 33 | ||
| 26 | // One UBO is always reserved for emulation values on staged shaders | 34 | // One UBO is always reserved for emulation values on staged shaders |
| 27 | constexpr u32 STAGE_RESERVED_UBOS = 1; | 35 | constexpr u32 STAGE_RESERVED_UBOS = 1; |
| 28 | 36 | ||
| 29 | struct UnspecializedShader { | 37 | constexpr u32 STAGE_MAIN_OFFSET = 10; |
| 30 | std::string code; | 38 | constexpr u32 KERNEL_MAIN_OFFSET = 0; |
| 31 | GLShader::ShaderEntries entries; | ||
| 32 | ProgramType program_type; | ||
| 33 | }; | ||
| 34 | 39 | ||
| 35 | namespace { | 40 | constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; |
| 36 | 41 | ||
| 37 | /// Gets the address for the specified shader stage program | 42 | /// Gets the address for the specified shader stage program |
| 38 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { | 43 | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { |
| @@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) | |||
| 41 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; | 46 | return gpu.regs.code_address.CodeAddress() + shader_config.offset; |
| 42 | } | 47 | } |
| 43 | 48 | ||
| 49 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 50 | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 51 | // Sched instructions appear once every 4 instructions. | ||
| 52 | constexpr std::size_t SchedPeriod = 4; | ||
| 53 | const std::size_t absolute_offset = offset - main_offset; | ||
| 54 | return (absolute_offset % SchedPeriod) == 0; | ||
| 55 | } | ||
| 56 | |||
| 57 | /// Calculates the size of a program stream | ||
| 58 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | ||
| 59 | constexpr std::size_t start_offset = 10; | ||
| 60 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 61 | // shaders end with one. | ||
| 62 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 63 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 64 | std::size_t offset = start_offset; | ||
| 65 | while (offset < program.size()) { | ||
| 66 | const u64 instruction = program[offset]; | ||
| 67 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 68 | if ((instruction & mask) == self_jumping_branch) { | ||
| 69 | // End on Maxwell's "nop" instruction | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | if (instruction == 0) { | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | offset++; | ||
| 77 | } | ||
| 78 | // The last instruction is included in the program size | ||
| 79 | return std::min(offset + 1, program.size()); | ||
| 80 | } | ||
| 81 | |||
| 44 | /// Gets the shader program code from memory for the specified address | 82 | /// Gets the shader program code from memory for the specified address |
| 45 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, | 83 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, |
| 46 | const u8* host_ptr) { | 84 | const u8* host_ptr) { |
| @@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| 51 | }); | 89 | }); |
| 52 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), | 90 | memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), |
| 53 | program_code.size() * sizeof(u64)); | 91 | program_code.size() * sizeof(u64)); |
| 92 | program_code.resize(CalculateProgramSize(program_code)); | ||
| 54 | return program_code; | 93 | return program_code; |
| 55 | } | 94 | } |
| 56 | 95 | ||
| @@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) { | |||
| 71 | } | 110 | } |
| 72 | } | 111 | } |
| 73 | 112 | ||
| 74 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 75 | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 76 | // Sched instructions appear once every 4 instructions. | ||
| 77 | constexpr std::size_t SchedPeriod = 4; | ||
| 78 | const std::size_t absolute_offset = offset - main_offset; | ||
| 79 | return (absolute_offset % SchedPeriod) == 0; | ||
| 80 | } | ||
| 81 | |||
| 82 | /// Describes primitive behavior on geometry shaders | 113 | /// Describes primitive behavior on geometry shaders |
| 83 | constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | 114 | constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { |
| 84 | switch (primitive_mode) { | 115 | switch (primitive_mode) { |
| @@ -121,110 +152,142 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) { | |||
| 121 | return {}; | 152 | return {}; |
| 122 | } | 153 | } |
| 123 | 154 | ||
| 124 | /// Calculates the size of a program stream | ||
| 125 | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | ||
| 126 | constexpr std::size_t start_offset = 10; | ||
| 127 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 128 | // shaders end with one. | ||
| 129 | constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||
| 130 | constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||
| 131 | std::size_t offset = start_offset; | ||
| 132 | std::size_t size = start_offset * sizeof(u64); | ||
| 133 | while (offset < program.size()) { | ||
| 134 | const u64 instruction = program[offset]; | ||
| 135 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 136 | if ((instruction & mask) == self_jumping_branch) { | ||
| 137 | // End on Maxwell's "nop" instruction | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | if (instruction == 0) { | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | } | ||
| 144 | size += sizeof(u64); | ||
| 145 | offset++; | ||
| 146 | } | ||
| 147 | // The last instruction is included in the program size | ||
| 148 | return std::min(size + sizeof(u64), program.size() * sizeof(u64)); | ||
| 149 | } | ||
| 150 | |||
| 151 | /// Hashes one (or two) program streams | 155 | /// Hashes one (or two) program streams |
| 152 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, | 156 | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, |
| 153 | const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { | 157 | const ProgramCode& code_b) { |
| 154 | if (size_a == 0) { | 158 | u64 unique_identifier = boost::hash_value(code); |
| 155 | size_a = CalculateProgramSize(code); | 159 | if (program_type == ProgramType::VertexA) { |
| 156 | } | 160 | // VertexA programs include two programs |
| 157 | u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | 161 | boost::hash_combine(unique_identifier, boost::hash_value(code_b)); |
| 158 | if (program_type != ProgramType::VertexA) { | ||
| 159 | return unique_identifier; | ||
| 160 | } | ||
| 161 | // VertexA programs include two programs | ||
| 162 | |||
| 163 | std::size_t seed = 0; | ||
| 164 | boost::hash_combine(seed, unique_identifier); | ||
| 165 | |||
| 166 | if (size_b == 0) { | ||
| 167 | size_b = CalculateProgramSize(code_b); | ||
| 168 | } | 162 | } |
| 169 | const u64 identifier_b = | 163 | return unique_identifier; |
| 170 | Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); | ||
| 171 | boost::hash_combine(seed, identifier_b); | ||
| 172 | return static_cast<u64>(seed); | ||
| 173 | } | 164 | } |
| 174 | 165 | ||
| 175 | /// Creates an unspecialized program from code streams | 166 | /// Creates an unspecialized program from code streams |
| 176 | GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, | 167 | std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, |
| 177 | ProgramCode program_code, ProgramCode program_code_b) { | 168 | const std::optional<ShaderIR>& ir_b) { |
| 178 | GLShader::ShaderSetup setup(program_code); | ||
| 179 | setup.program.size_a = CalculateProgramSize(program_code); | ||
| 180 | setup.program.size_b = 0; | ||
| 181 | if (program_type == ProgramType::VertexA) { | ||
| 182 | // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. | ||
| 183 | // Conventional HW does not support this, so we combine VertexA and VertexB into one | ||
| 184 | // stage here. | ||
| 185 | setup.SetProgramB(program_code_b); | ||
| 186 | setup.program.size_b = CalculateProgramSize(program_code_b); | ||
| 187 | } | ||
| 188 | setup.program.unique_identifier = GetUniqueIdentifier( | ||
| 189 | program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); | ||
| 190 | |||
| 191 | switch (program_type) { | 169 | switch (program_type) { |
| 192 | case ProgramType::VertexA: | 170 | case ProgramType::VertexA: |
| 193 | case ProgramType::VertexB: | 171 | case ProgramType::VertexB: |
| 194 | return GLShader::GenerateVertexShader(device, setup); | 172 | return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); |
| 195 | case ProgramType::Geometry: | 173 | case ProgramType::Geometry: |
| 196 | return GLShader::GenerateGeometryShader(device, setup); | 174 | return GLShader::GenerateGeometryShader(device, ir); |
| 197 | case ProgramType::Fragment: | 175 | case ProgramType::Fragment: |
| 198 | return GLShader::GenerateFragmentShader(device, setup); | 176 | return GLShader::GenerateFragmentShader(device, ir); |
| 199 | case ProgramType::Compute: | 177 | case ProgramType::Compute: |
| 200 | return GLShader::GenerateComputeShader(device, setup); | 178 | return GLShader::GenerateComputeShader(device, ir); |
| 201 | default: | 179 | default: |
| 202 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); | 180 | UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |
| 203 | return {}; | 181 | return {}; |
| 204 | } | 182 | } |
| 205 | } | 183 | } |
| 206 | 184 | ||
| 207 | CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | 185 | constexpr const char* GetProgramTypeName(ProgramType program_type) { |
| 208 | ProgramType program_type, const ProgramVariant& variant, | 186 | switch (program_type) { |
| 209 | bool hint_retrievable = false) { | 187 | case ProgramType::VertexA: |
| 188 | case ProgramType::VertexB: | ||
| 189 | return "VS"; | ||
| 190 | case ProgramType::TessellationControl: | ||
| 191 | return "TCS"; | ||
| 192 | case ProgramType::TessellationEval: | ||
| 193 | return "TES"; | ||
| 194 | case ProgramType::Geometry: | ||
| 195 | return "GS"; | ||
| 196 | case ProgramType::Fragment: | ||
| 197 | return "FS"; | ||
| 198 | case ProgramType::Compute: | ||
| 199 | return "CS"; | ||
| 200 | } | ||
| 201 | return "UNK"; | ||
| 202 | } | ||
| 203 | |||
| 204 | Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { | ||
| 205 | switch (program_type) { | ||
| 206 | case ProgramType::VertexA: | ||
| 207 | case ProgramType::VertexB: | ||
| 208 | return Tegra::Engines::ShaderType::Vertex; | ||
| 209 | case ProgramType::TessellationControl: | ||
| 210 | return Tegra::Engines::ShaderType::TesselationControl; | ||
| 211 | case ProgramType::TessellationEval: | ||
| 212 | return Tegra::Engines::ShaderType::TesselationEval; | ||
| 213 | case ProgramType::Geometry: | ||
| 214 | return Tegra::Engines::ShaderType::Geometry; | ||
| 215 | case ProgramType::Fragment: | ||
| 216 | return Tegra::Engines::ShaderType::Fragment; | ||
| 217 | case ProgramType::Compute: | ||
| 218 | return Tegra::Engines::ShaderType::Compute; | ||
| 219 | } | ||
| 220 | UNREACHABLE(); | ||
| 221 | return {}; | ||
| 222 | } | ||
| 223 | |||
| 224 | std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { | ||
| 225 | return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); | ||
| 226 | } | ||
| 227 | |||
| 228 | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( | ||
| 229 | Core::System& system, ProgramType program_type) { | ||
| 230 | if (program_type == ProgramType::Compute) { | ||
| 231 | return system.GPU().KeplerCompute(); | ||
| 232 | } else { | ||
| 233 | return system.GPU().Maxwell3D(); | ||
| 234 | } | ||
| 235 | } | ||
| 236 | |||
| 237 | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) { | ||
| 238 | return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type), | ||
| 239 | GetConstBufferEngineInterface(system, program_type)); | ||
| 240 | } | ||
| 241 | |||
| 242 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | ||
| 243 | for (const auto& key : usage.keys) { | ||
| 244 | const auto [buffer, offset] = key.first; | ||
| 245 | locker.InsertKey(buffer, offset, key.second); | ||
| 246 | } | ||
| 247 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 248 | locker.InsertBoundSampler(offset, sampler); | ||
| 249 | } | ||
| 250 | for (const auto& [key, sampler] : usage.bindless_samplers) { | ||
| 251 | const auto [buffer, offset] = key; | ||
| 252 | locker.InsertBindlessSampler(buffer, offset, sampler); | ||
| 253 | } | ||
| 254 | } | ||
| 255 | |||
| 256 | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, | ||
| 257 | const ProgramCode& program_code, const ProgramCode& program_code_b, | ||
| 258 | const ProgramVariant& variant, ConstBufferLocker& locker, | ||
| 259 | bool hint_retrievable = false) { | ||
| 260 | LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); | ||
| 261 | |||
| 262 | const bool is_compute = program_type == ProgramType::Compute; | ||
| 263 | const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 264 | const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); | ||
| 265 | std::optional<ShaderIR> ir_b; | ||
| 266 | if (!program_code_b.empty()) { | ||
| 267 | ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); | ||
| 268 | } | ||
| 269 | const auto entries = GLShader::GetEntries(ir); | ||
| 270 | |||
| 210 | auto base_bindings{variant.base_bindings}; | 271 | auto base_bindings{variant.base_bindings}; |
| 211 | const auto primitive_mode{variant.primitive_mode}; | 272 | const auto primitive_mode{variant.primitive_mode}; |
| 212 | const auto texture_buffer_usage{variant.texture_buffer_usage}; | 273 | const auto texture_buffer_usage{variant.texture_buffer_usage}; |
| 213 | 274 | ||
| 214 | std::string source = R"(#version 430 core | 275 | std::string source = fmt::format(R"(// {} |
| 276 | #version 430 core | ||
| 215 | #extension GL_ARB_separate_shader_objects : enable | 277 | #extension GL_ARB_separate_shader_objects : enable |
| 216 | #extension GL_ARB_shader_viewport_layer_array : enable | 278 | #extension GL_ARB_shader_viewport_layer_array : enable |
| 217 | #extension GL_EXT_shader_image_load_formatted : enable | 279 | #extension GL_EXT_shader_image_load_formatted : enable |
| 218 | #extension GL_NV_gpu_shader5 : enable | 280 | #extension GL_NV_gpu_shader5 : enable |
| 219 | #extension GL_NV_shader_thread_group : enable | 281 | #extension GL_NV_shader_thread_group : enable |
| 220 | #extension GL_NV_shader_thread_shuffle : enable | 282 | #extension GL_NV_shader_thread_shuffle : enable |
| 221 | )"; | 283 | )", |
| 222 | if (program_type == ProgramType::Compute) { | 284 | GetShaderId(unique_identifier, program_type)); |
| 285 | if (is_compute) { | ||
| 223 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; | 286 | source += "#extension GL_ARB_compute_variable_group_size : require\n"; |
| 224 | } | 287 | } |
| 225 | source += '\n'; | 288 | source += '\n'; |
| 226 | 289 | ||
| 227 | if (program_type != ProgramType::Compute) { | 290 | if (!is_compute) { |
| 228 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | 291 | source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); |
| 229 | } | 292 | } |
| 230 | 293 | ||
| @@ -268,7 +331,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 268 | } | 331 | } |
| 269 | 332 | ||
| 270 | source += '\n'; | 333 | source += '\n'; |
| 271 | source += code; | 334 | source += GenerateGLSL(device, program_type, ir, ir_b); |
| 272 | 335 | ||
| 273 | OGLShader shader; | 336 | OGLShader shader; |
| 274 | shader.Create(source.c_str(), GetShaderType(program_type)); | 337 | shader.Create(source.c_str(), GetShaderType(program_type)); |
| @@ -278,85 +341,97 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
| 278 | return program; | 341 | return program; |
| 279 | } | 342 | } |
| 280 | 343 | ||
| 281 | std::set<GLenum> GetSupportedFormats() { | 344 | std::unordered_set<GLenum> GetSupportedFormats() { |
| 282 | std::set<GLenum> supported_formats; | ||
| 283 | |||
| 284 | GLint num_formats{}; | 345 | GLint num_formats{}; |
| 285 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | 346 | glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |
| 286 | 347 | ||
| 287 | std::vector<GLint> formats(num_formats); | 348 | std::vector<GLint> formats(num_formats); |
| 288 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | 349 | glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); |
| 289 | 350 | ||
| 290 | for (const GLint format : formats) | 351 | std::unordered_set<GLenum> supported_formats; |
| 352 | for (const GLint format : formats) { | ||
| 291 | supported_formats.insert(static_cast<GLenum>(format)); | 353 | supported_formats.insert(static_cast<GLenum>(format)); |
| 354 | } | ||
| 292 | return supported_formats; | 355 | return supported_formats; |
| 293 | } | 356 | } |
| 294 | 357 | ||
| 295 | } // Anonymous namespace | 358 | } // Anonymous namespace |
| 296 | 359 | ||
| 297 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, | 360 | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 298 | GLShader::ProgramResult result) | 361 | GLShader::ShaderEntries entries, ProgramCode program_code, |
| 299 | : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, | 362 | ProgramCode program_code_b) |
| 300 | unique_identifier{params.unique_identifier}, program_type{program_type}, | 363 | : RasterizerCacheObject{params.host_ptr}, system{params.system}, |
| 301 | disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, | 364 | disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, |
| 302 | entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} | 365 | unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, |
| 366 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { | ||
| 367 | if (!params.precompiled_variants) { | ||
| 368 | return; | ||
| 369 | } | ||
| 370 | for (const auto& pair : *params.precompiled_variants) { | ||
| 371 | auto locker = MakeLocker(system, program_type); | ||
| 372 | const auto& usage = pair->first; | ||
| 373 | FillLocker(*locker, usage); | ||
| 374 | |||
| 375 | std::unique_ptr<LockerVariant>* locker_variant = nullptr; | ||
| 376 | const auto it = | ||
| 377 | std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { | ||
| 378 | return variant->locker->HasEqualKeys(*locker); | ||
| 379 | }); | ||
| 380 | if (it == locker_variants.end()) { | ||
| 381 | locker_variant = &locker_variants.emplace_back(); | ||
| 382 | *locker_variant = std::make_unique<LockerVariant>(); | ||
| 383 | locker_variant->get()->locker = std::move(locker); | ||
| 384 | } else { | ||
| 385 | locker_variant = &*it; | ||
| 386 | } | ||
| 387 | locker_variant->get()->programs.emplace(usage.variant, pair->second); | ||
| 388 | } | ||
| 389 | } | ||
| 303 | 390 | ||
| 304 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 391 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, |
| 305 | Maxwell::ShaderProgram program_type, | 392 | Maxwell::ShaderProgram program_type, |
| 306 | ProgramCode&& program_code, | 393 | ProgramCode program_code, ProgramCode program_code_b) { |
| 307 | ProgramCode&& program_code_b) { | ||
| 308 | const auto code_size{CalculateProgramSize(program_code)}; | ||
| 309 | const auto code_size_b{CalculateProgramSize(program_code_b)}; | ||
| 310 | auto result{ | ||
| 311 | CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||
| 312 | if (result.first.empty()) { | ||
| 313 | // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now | ||
| 314 | return {}; | ||
| 315 | } | ||
| 316 | |||
| 317 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | 394 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |
| 318 | params.unique_identifier, GetProgramType(program_type), | 395 | params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); |
| 319 | static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), | 396 | |
| 320 | std::move(program_code), std::move(program_code_b))); | 397 | ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type))); |
| 321 | 398 | const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | |
| 399 | // TODO(Rodrigo): Handle VertexA shaders | ||
| 400 | // std::optional<ShaderIR> ir_b; | ||
| 401 | // if (!program_code_b.empty()) { | ||
| 402 | // ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); | ||
| 403 | // } | ||
| 322 | return std::shared_ptr<CachedShader>( | 404 | return std::shared_ptr<CachedShader>( |
| 323 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | 405 | new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), |
| 406 | std::move(program_code), std::move(program_code_b))); | ||
| 324 | } | 407 | } |
| 325 | 408 | ||
| 326 | Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | 409 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| 327 | Maxwell::ShaderProgram program_type, | 410 | params.disk_cache.SaveRaw( |
| 328 | GLShader::ProgramResult result) { | 411 | ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); |
| 329 | return std::shared_ptr<CachedShader>( | ||
| 330 | new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||
| 331 | } | ||
| 332 | |||
| 333 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | ||
| 334 | auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; | ||
| 335 | |||
| 336 | const auto code_size{CalculateProgramSize(code)}; | ||
| 337 | params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | ||
| 338 | static_cast<u32>(code_size / sizeof(u64)), 0, | ||
| 339 | std::move(code), {})); | ||
| 340 | 412 | ||
| 341 | return std::shared_ptr<CachedShader>( | 413 | ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute); |
| 342 | new CachedShader(params, ProgramType::Compute, std::move(result))); | 414 | const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); |
| 415 | return std::shared_ptr<CachedShader>(new CachedShader( | ||
| 416 | params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); | ||
| 343 | } | 417 | } |
| 344 | 418 | ||
| 345 | Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | 419 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 346 | GLShader::ProgramResult result) { | 420 | const UnspecializedShader& unspecialized) { |
| 347 | return std::shared_ptr<CachedShader>( | 421 | return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, |
| 348 | new CachedShader(params, ProgramType::Compute, std::move(result))); | 422 | unspecialized.entries, unspecialized.code, |
| 423 | unspecialized.code_b)); | ||
| 349 | } | 424 | } |
| 350 | 425 | ||
| 351 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | 426 | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { |
| 352 | const auto [entry, is_cache_miss] = programs.try_emplace(variant); | 427 | UpdateVariant(); |
| 428 | |||
| 429 | const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); | ||
| 353 | auto& program = entry->second; | 430 | auto& program = entry->second; |
| 354 | if (is_cache_miss) { | 431 | if (is_cache_miss) { |
| 355 | program = TryLoadProgram(variant); | 432 | program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, |
| 356 | if (!program) { | 433 | variant, *curr_variant->locker); |
| 357 | program = SpecializeShader(code, entries, program_type, variant); | 434 | disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); |
| 358 | disk_cache.SaveUsage(GetUsage(variant)); | ||
| 359 | } | ||
| 360 | 435 | ||
| 361 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | 436 | LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |
| 362 | } | 437 | } |
| @@ -372,18 +447,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
| 372 | return {program->handle, base_bindings}; | 447 | return {program->handle, base_bindings}; |
| 373 | } | 448 | } |
| 374 | 449 | ||
| 375 | CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { | 450 | void CachedShader::UpdateVariant() { |
| 376 | const auto found = precompiled_programs.find(GetUsage(variant)); | 451 | if (curr_variant && !curr_variant->locker->IsConsistent()) { |
| 377 | if (found == precompiled_programs.end()) { | 452 | curr_variant = nullptr; |
| 378 | return {}; | 453 | } |
| 454 | if (!curr_variant) { | ||
| 455 | for (auto& variant : locker_variants) { | ||
| 456 | if (variant->locker->IsConsistent()) { | ||
| 457 | curr_variant = variant.get(); | ||
| 458 | } | ||
| 459 | } | ||
| 460 | } | ||
| 461 | if (!curr_variant) { | ||
| 462 | auto& new_variant = locker_variants.emplace_back(); | ||
| 463 | new_variant = std::make_unique<LockerVariant>(); | ||
| 464 | new_variant->locker = MakeLocker(system, program_type); | ||
| 465 | curr_variant = new_variant.get(); | ||
| 379 | } | 466 | } |
| 380 | return found->second; | ||
| 381 | } | 467 | } |
| 382 | 468 | ||
| 383 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { | 469 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, |
| 470 | const ConstBufferLocker& locker) const { | ||
| 384 | ShaderDiskCacheUsage usage; | 471 | ShaderDiskCacheUsage usage; |
| 385 | usage.unique_identifier = unique_identifier; | 472 | usage.unique_identifier = unique_identifier; |
| 386 | usage.variant = variant; | 473 | usage.variant = variant; |
| 474 | usage.keys = locker.GetKeys(); | ||
| 475 | usage.bound_samplers = locker.GetBoundSamplers(); | ||
| 476 | usage.bindless_samplers = locker.GetBindlessSamplers(); | ||
| 387 | return usage; | 477 | return usage; |
| 388 | } | 478 | } |
| 389 | 479 | ||
| @@ -399,18 +489,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 399 | return; | 489 | return; |
| 400 | } | 490 | } |
| 401 | const auto [raws, shader_usages] = *transferable; | 491 | const auto [raws, shader_usages] = *transferable; |
| 402 | 492 | if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { | |
| 403 | auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); | ||
| 404 | |||
| 405 | const auto supported_formats{GetSupportedFormats()}; | ||
| 406 | const auto unspecialized_shaders{ | ||
| 407 | GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; | ||
| 408 | if (stop_loading) { | ||
| 409 | return; | 493 | return; |
| 410 | } | 494 | } |
| 411 | 495 | ||
| 412 | // Track if precompiled cache was altered during loading to know if we have to serialize the | 496 | const auto dumps = disk_cache.LoadPrecompiled(); |
| 413 | // virtual precompiled cache file back to the hard drive | 497 | const auto supported_formats = GetSupportedFormats(); |
| 498 | |||
| 499 | // Track if precompiled cache was altered during loading to know if we have to | ||
| 500 | // serialize the virtual precompiled cache file back to the hard drive | ||
| 414 | bool precompiled_cache_altered = false; | 501 | bool precompiled_cache_altered = false; |
| 415 | 502 | ||
| 416 | // Inform the frontend about shader build initialization | 503 | // Inform the frontend about shader build initialization |
| @@ -433,9 +520,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 433 | return; | 520 | return; |
| 434 | } | 521 | } |
| 435 | const auto& usage{shader_usages[i]}; | 522 | const auto& usage{shader_usages[i]}; |
| 436 | LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", | ||
| 437 | usage.unique_identifier, i, shader_usages.size()); | ||
| 438 | |||
| 439 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | 523 | const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; |
| 440 | const auto dump{dumps.find(usage)}; | 524 | const auto dump{dumps.find(usage)}; |
| 441 | 525 | ||
| @@ -449,21 +533,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 449 | } | 533 | } |
| 450 | } | 534 | } |
| 451 | if (!shader) { | 535 | if (!shader) { |
| 452 | shader = SpecializeShader(unspecialized.code, unspecialized.entries, | 536 | auto locker{MakeLocker(system, unspecialized.program_type)}; |
| 453 | unspecialized.program_type, usage.variant, true); | 537 | FillLocker(*locker, usage); |
| 538 | shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, | ||
| 539 | unspecialized.code, unspecialized.code_b, usage.variant, | ||
| 540 | *locker, true); | ||
| 454 | } | 541 | } |
| 455 | 542 | ||
| 456 | std::scoped_lock lock(mutex); | 543 | std::scoped_lock lock{mutex}; |
| 457 | if (callback) { | 544 | if (callback) { |
| 458 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | 545 | callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |
| 459 | shader_usages.size()); | 546 | shader_usages.size()); |
| 460 | } | 547 | } |
| 461 | 548 | ||
| 462 | precompiled_programs.emplace(usage, std::move(shader)); | 549 | precompiled_programs.emplace(usage, std::move(shader)); |
| 550 | |||
| 551 | // TODO(Rodrigo): Is there a better way to do this? | ||
| 552 | precompiled_variants[usage.unique_identifier].push_back( | ||
| 553 | precompiled_programs.find(usage)); | ||
| 463 | } | 554 | } |
| 464 | }; | 555 | }; |
| 465 | 556 | ||
| 466 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; | 557 | const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; |
| 467 | const std::size_t bucket_size{shader_usages.size() / num_workers}; | 558 | const std::size_t bucket_size{shader_usages.size() / num_workers}; |
| 468 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | 559 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |
| 469 | std::vector<std::thread> threads(num_workers); | 560 | std::vector<std::thread> threads(num_workers); |
| @@ -483,7 +574,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 483 | if (compilation_failed) { | 574 | if (compilation_failed) { |
| 484 | // Invalidate the precompiled cache if a shader dumped shader was rejected | 575 | // Invalidate the precompiled cache if a shader dumped shader was rejected |
| 485 | disk_cache.InvalidatePrecompiled(); | 576 | disk_cache.InvalidatePrecompiled(); |
| 486 | dumps.clear(); | ||
| 487 | precompiled_cache_altered = true; | 577 | precompiled_cache_altered = true; |
| 488 | return; | 578 | return; |
| 489 | } | 579 | } |
| @@ -491,8 +581,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 491 | return; | 581 | return; |
| 492 | } | 582 | } |
| 493 | 583 | ||
| 494 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before | 584 | // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw |
| 495 | // precompiling them | 585 | // before precompiling them |
| 496 | 586 | ||
| 497 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { | 587 | for (std::size_t i = 0; i < shader_usages.size(); ++i) { |
| 498 | const auto& usage{shader_usages[i]}; | 588 | const auto& usage{shader_usages[i]}; |
| @@ -508,9 +598,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||
| 508 | } | 598 | } |
| 509 | } | 599 | } |
| 510 | 600 | ||
| 511 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | 601 | const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { |
| 512 | const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { | 602 | const auto it = precompiled_variants.find(unique_identifier); |
| 603 | return it == precompiled_variants.end() ? nullptr : &it->second; | ||
| 604 | } | ||
| 513 | 605 | ||
| 606 | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||
| 607 | const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { | ||
| 514 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { | 608 | if (supported_formats.find(dump.binary_format) == supported_formats.end()) { |
| 515 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); | 609 | LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); |
| 516 | return {}; | 610 | return {}; |
| @@ -532,56 +626,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 532 | return shader; | 626 | return shader; |
| 533 | } | 627 | } |
| 534 | 628 | ||
| 535 | std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( | 629 | bool ShaderCacheOpenGL::GenerateUnspecializedShaders( |
| 536 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | 630 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, |
| 537 | const std::vector<ShaderDiskCacheRaw>& raws, | 631 | const std::vector<ShaderDiskCacheRaw>& raws) { |
| 538 | const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { | ||
| 539 | std::unordered_map<u64, UnspecializedShader> unspecialized; | ||
| 540 | |||
| 541 | if (callback) { | 632 | if (callback) { |
| 542 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | 633 | callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); |
| 543 | } | 634 | } |
| 544 | 635 | ||
| 545 | for (std::size_t i = 0; i < raws.size(); ++i) { | 636 | for (std::size_t i = 0; i < raws.size(); ++i) { |
| 546 | if (stop_loading) { | 637 | if (stop_loading) { |
| 547 | return {}; | 638 | return false; |
| 548 | } | 639 | } |
| 549 | const auto& raw{raws[i]}; | 640 | const auto& raw{raws[i]}; |
| 550 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; | 641 | const u64 unique_identifier{raw.GetUniqueIdentifier()}; |
| 551 | const u64 calculated_hash{ | 642 | const u64 calculated_hash{ |
| 552 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; | 643 | GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; |
| 553 | if (unique_identifier != calculated_hash) { | 644 | if (unique_identifier != calculated_hash) { |
| 554 | LOG_ERROR( | 645 | LOG_ERROR(Render_OpenGL, |
| 555 | Render_OpenGL, | 646 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - " |
| 556 | "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", | 647 | "removing shader cache", |
| 557 | raw.GetUniqueIdentifier(), calculated_hash); | 648 | raw.GetUniqueIdentifier(), calculated_hash); |
| 558 | disk_cache.InvalidateTransferable(); | 649 | disk_cache.InvalidateTransferable(); |
| 559 | return {}; | 650 | return false; |
| 560 | } | 651 | } |
| 561 | 652 | ||
| 562 | GLShader::ProgramResult result; | 653 | const u32 main_offset = |
| 563 | if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { | 654 | raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; |
| 564 | // If it's stored in the precompiled file, avoid decompiling it here | 655 | ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); |
| 565 | const auto& stored_decompiled{it->second}; | 656 | const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); |
| 566 | result = {stored_decompiled.code, stored_decompiled.entries}; | 657 | // TODO(Rodrigo): Handle VertexA shaders |
| 567 | } else { | 658 | // std::optional<ShaderIR> ir_b; |
| 568 | // Otherwise decompile the shader at boot and save the result to the decompiled file | 659 | // if (raw.HasProgramA()) { |
| 569 | result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), | 660 | // ir_b.emplace(raw.GetProgramCodeB(), main_offset); |
| 570 | raw.GetProgramCodeB()); | 661 | // } |
| 571 | disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); | 662 | |
| 572 | } | 663 | UnspecializedShader unspecialized; |
| 573 | 664 | unspecialized.entries = GLShader::GetEntries(ir); | |
| 574 | precompiled_shaders.insert({unique_identifier, result}); | 665 | unspecialized.program_type = raw.GetProgramType(); |
| 575 | 666 | unspecialized.code = raw.GetProgramCode(); | |
| 576 | unspecialized.insert( | 667 | unspecialized.code_b = raw.GetProgramCodeB(); |
| 577 | {raw.GetUniqueIdentifier(), | 668 | unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); |
| 578 | {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); | ||
| 579 | 669 | ||
| 580 | if (callback) { | 670 | if (callback) { |
| 581 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | 671 | callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); |
| 582 | } | 672 | } |
| 583 | } | 673 | } |
| 584 | return unspecialized; | 674 | return true; |
| 585 | } | 675 | } |
| 586 | 676 | ||
| 587 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 677 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| @@ -590,37 +680,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 590 | } | 680 | } |
| 591 | 681 | ||
| 592 | auto& memory_manager{system.GPU().MemoryManager()}; | 682 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 593 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 683 | const GPUVAddr address{GetShaderAddress(system, program)}; |
| 594 | 684 | ||
| 595 | // Look up shader in the cache based on address | 685 | // Look up shader in the cache based on address |
| 596 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 686 | const auto host_ptr{memory_manager.GetPointer(address)}; |
| 597 | Shader shader{TryGet(host_ptr)}; | 687 | Shader shader{TryGet(host_ptr)}; |
| 598 | if (shader) { | 688 | if (shader) { |
| 599 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 689 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 600 | } | 690 | } |
| 601 | 691 | ||
| 602 | // No shader found - create a new one | 692 | // No shader found - create a new one |
| 603 | ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | 693 | ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; |
| 604 | ProgramCode program_code_b; | 694 | ProgramCode code_b; |
| 605 | const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; | 695 | if (program == Maxwell::ShaderProgram::VertexA) { |
| 606 | if (is_program_a) { | 696 | const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |
| 607 | const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | 697 | code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); |
| 608 | program_code_b = GetShaderCode(memory_manager, program_addr_b, | 698 | } |
| 609 | memory_manager.GetPointer(program_addr_b)); | 699 | |
| 610 | } | 700 | const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); |
| 611 | 701 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | |
| 612 | const auto unique_identifier = | 702 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; |
| 613 | GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | 703 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, |
| 614 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | 704 | cpu_addr, host_ptr, unique_identifier}; |
| 615 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | 705 | |
| 616 | host_ptr, unique_identifier}; | 706 | const auto found = unspecialized_shaders.find(unique_identifier); |
| 617 | 707 | if (found == unspecialized_shaders.end()) { | |
| 618 | const auto found = precompiled_shaders.find(unique_identifier); | 708 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), |
| 619 | if (found == precompiled_shaders.end()) { | 709 | std::move(code_b)); |
| 620 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), | ||
| 621 | std::move(program_code_b)); | ||
| 622 | } else { | 710 | } else { |
| 623 | shader = CachedShader::CreateStageFromCache(params, program, found->second); | 711 | shader = CachedShader::CreateFromCache(params, found->second); |
| 624 | } | 712 | } |
| 625 | Register(shader); | 713 | Register(shader); |
| 626 | 714 | ||
| @@ -638,15 +726,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 638 | // No kernel found - create a new one | 726 | // No kernel found - create a new one |
| 639 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 727 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 640 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | 728 | const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; |
| 729 | const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||
| 641 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 730 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |
| 642 | const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | 731 | const ShaderParameters params{system, disk_cache, precompiled_variants, device, |
| 643 | host_ptr, unique_identifier}; | 732 | cpu_addr, host_ptr, unique_identifier}; |
| 644 | 733 | ||
| 645 | const auto found = precompiled_shaders.find(unique_identifier); | 734 | const auto found = unspecialized_shaders.find(unique_identifier); |
| 646 | if (found == precompiled_shaders.end()) { | 735 | if (found == unspecialized_shaders.end()) { |
| 647 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 736 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); |
| 648 | } else { | 737 | } else { |
| 649 | kernel = CachedShader::CreateKernelFromCache(params, found->second); | 738 | kernel = CachedShader::CreateFromCache(params, found->second); |
| 650 | } | 739 | } |
| 651 | 740 | ||
| 652 | Register(kernel); | 741 | Register(kernel); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index de195cc5d..6bd7c9cf1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -8,9 +8,10 @@ | |||
| 8 | #include <atomic> | 8 | #include <atomic> |
| 9 | #include <bitset> | 9 | #include <bitset> |
| 10 | #include <memory> | 10 | #include <memory> |
| 11 | #include <set> | 11 | #include <string> |
| 12 | #include <tuple> | 12 | #include <tuple> |
| 13 | #include <unordered_map> | 13 | #include <unordered_map> |
| 14 | #include <unordered_set> | ||
| 14 | #include <vector> | 15 | #include <vector> |
| 15 | 16 | ||
| 16 | #include <glad/glad.h> | 17 | #include <glad/glad.h> |
| @@ -20,6 +21,8 @@ | |||
| 20 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 22 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 23 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 24 | #include "video_core/shader/const_buffer_locker.h" | ||
| 25 | #include "video_core/shader/shader_ir.h" | ||
| 23 | 26 | ||
| 24 | namespace Core { | 27 | namespace Core { |
| 25 | class System; | 28 | class System; |
| @@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>; | |||
| 40 | using CachedProgram = std::shared_ptr<OGLProgram>; | 43 | using CachedProgram = std::shared_ptr<OGLProgram>; |
| 41 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 42 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | 45 | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; |
| 43 | using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | 46 | using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; |
| 47 | |||
| 48 | struct UnspecializedShader { | ||
| 49 | GLShader::ShaderEntries entries; | ||
| 50 | ProgramType program_type; | ||
| 51 | ProgramCode code; | ||
| 52 | ProgramCode code_b; | ||
| 53 | }; | ||
| 44 | 54 | ||
| 45 | struct ShaderParameters { | 55 | struct ShaderParameters { |
| 56 | Core::System& system; | ||
| 46 | ShaderDiskCacheOpenGL& disk_cache; | 57 | ShaderDiskCacheOpenGL& disk_cache; |
| 47 | const PrecompiledPrograms& precompiled_programs; | 58 | const PrecompiledVariants* precompiled_variants; |
| 48 | const Device& device; | 59 | const Device& device; |
| 49 | VAddr cpu_addr; | 60 | VAddr cpu_addr; |
| 50 | u8* host_ptr; | 61 | u8* host_ptr; |
| @@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject { | |||
| 55 | public: | 66 | public: |
| 56 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 67 | static Shader CreateStageFromMemory(const ShaderParameters& params, |
| 57 | Maxwell::ShaderProgram program_type, | 68 | Maxwell::ShaderProgram program_type, |
| 58 | ProgramCode&& program_code, ProgramCode&& program_code_b); | 69 | ProgramCode program_code, ProgramCode program_code_b); |
| 59 | 70 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | |
| 60 | static Shader CreateStageFromCache(const ShaderParameters& params, | ||
| 61 | Maxwell::ShaderProgram program_type, | ||
| 62 | GLShader::ProgramResult result); | ||
| 63 | 71 | ||
| 64 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | 72 | static Shader CreateFromCache(const ShaderParameters& params, |
| 65 | 73 | const UnspecializedShader& unspecialized); | |
| 66 | static Shader CreateKernelFromCache(const ShaderParameters& params, | ||
| 67 | GLShader::ProgramResult result); | ||
| 68 | 74 | ||
| 69 | VAddr GetCpuAddr() const override { | 75 | VAddr GetCpuAddr() const override { |
| 70 | return cpu_addr; | 76 | return cpu_addr; |
| 71 | } | 77 | } |
| 72 | 78 | ||
| 73 | std::size_t GetSizeInBytes() const override { | 79 | std::size_t GetSizeInBytes() const override { |
| 74 | return shader_length; | 80 | return program_code.size() * sizeof(u64); |
| 75 | } | 81 | } |
| 76 | 82 | ||
| 77 | /// Gets the shader entries for the shader | 83 | /// Gets the shader entries for the shader |
| @@ -83,24 +89,36 @@ public: | |||
| 83 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | 89 | std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |
| 84 | 90 | ||
| 85 | private: | 91 | private: |
| 92 | struct LockerVariant { | ||
| 93 | std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; | ||
| 94 | std::unordered_map<ProgramVariant, CachedProgram> programs; | ||
| 95 | }; | ||
| 96 | |||
| 86 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, | 97 | explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |
| 87 | GLShader::ProgramResult result); | 98 | GLShader::ShaderEntries entries, ProgramCode program_code, |
| 99 | ProgramCode program_code_b); | ||
| 88 | 100 | ||
| 89 | CachedProgram TryLoadProgram(const ProgramVariant& variant) const; | 101 | void UpdateVariant(); |
| 90 | 102 | ||
| 91 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; | 103 | ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, |
| 104 | const VideoCommon::Shader::ConstBufferLocker& locker) const; | ||
| 105 | |||
| 106 | Core::System& system; | ||
| 107 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 108 | const Device& device; | ||
| 92 | 109 | ||
| 93 | VAddr cpu_addr{}; | 110 | VAddr cpu_addr{}; |
| 111 | |||
| 94 | u64 unique_identifier{}; | 112 | u64 unique_identifier{}; |
| 95 | ProgramType program_type{}; | 113 | ProgramType program_type{}; |
| 96 | ShaderDiskCacheOpenGL& disk_cache; | ||
| 97 | const PrecompiledPrograms& precompiled_programs; | ||
| 98 | 114 | ||
| 99 | GLShader::ShaderEntries entries; | 115 | GLShader::ShaderEntries entries; |
| 100 | std::string code; | ||
| 101 | std::size_t shader_length{}; | ||
| 102 | 116 | ||
| 103 | std::unordered_map<ProgramVariant, CachedProgram> programs; | 117 | ProgramCode program_code; |
| 118 | ProgramCode program_code_b; | ||
| 119 | |||
| 120 | LockerVariant* curr_variant = nullptr; | ||
| 121 | std::vector<std::unique_ptr<LockerVariant>> locker_variants; | ||
| 104 | }; | 122 | }; |
| 105 | 123 | ||
| 106 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 124 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { |
| @@ -123,21 +141,26 @@ protected: | |||
| 123 | void FlushObjectInner(const Shader& object) override {} | 141 | void FlushObjectInner(const Shader& object) override {} |
| 124 | 142 | ||
| 125 | private: | 143 | private: |
| 126 | std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( | 144 | bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, |
| 127 | const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | 145 | const VideoCore::DiskResourceLoadCallback& callback, |
| 128 | const std::vector<ShaderDiskCacheRaw>& raws, | 146 | const std::vector<ShaderDiskCacheRaw>& raws); |
| 129 | const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled); | ||
| 130 | 147 | ||
| 131 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | 148 | CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, |
| 132 | const std::set<GLenum>& supported_formats); | 149 | const std::unordered_set<GLenum>& supported_formats); |
| 150 | |||
| 151 | const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; | ||
| 133 | 152 | ||
| 134 | Core::System& system; | 153 | Core::System& system; |
| 135 | Core::Frontend::EmuWindow& emu_window; | 154 | Core::Frontend::EmuWindow& emu_window; |
| 136 | const Device& device; | 155 | const Device& device; |
| 156 | |||
| 137 | ShaderDiskCacheOpenGL disk_cache; | 157 | ShaderDiskCacheOpenGL disk_cache; |
| 138 | 158 | ||
| 139 | PrecompiledShaders precompiled_shaders; | ||
| 140 | PrecompiledPrograms precompiled_programs; | 159 | PrecompiledPrograms precompiled_programs; |
| 160 | std::unordered_map<u64, PrecompiledVariants> precompiled_variants; | ||
| 161 | |||
| 162 | std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; | ||
| 163 | |||
| 141 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 164 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 142 | }; | 165 | }; |
| 143 | 166 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6a610a3bc..030550c53 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -415,27 +415,6 @@ public: | |||
| 415 | return code.GetResult(); | 415 | return code.GetResult(); |
| 416 | } | 416 | } |
| 417 | 417 | ||
| 418 | ShaderEntries GetShaderEntries() const { | ||
| 419 | ShaderEntries entries; | ||
| 420 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 421 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||
| 422 | cbuf.first); | ||
| 423 | } | ||
| 424 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 425 | entries.samplers.emplace_back(sampler); | ||
| 426 | } | ||
| 427 | for (const auto& [offset, image] : ir.GetImages()) { | ||
| 428 | entries.images.emplace_back(image); | ||
| 429 | } | ||
| 430 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 431 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, | ||
| 432 | usage.is_read, usage.is_written); | ||
| 433 | } | ||
| 434 | entries.clip_distances = ir.GetClipDistances(); | ||
| 435 | entries.shader_length = ir.GetLength(); | ||
| 436 | return entries; | ||
| 437 | } | ||
| 438 | |||
| 439 | private: | 418 | private: |
| 440 | friend class ASTDecompiler; | 419 | friend class ASTDecompiler; |
| 441 | friend class ExprDecompiler; | 420 | friend class ExprDecompiler; |
| @@ -1148,7 +1127,7 @@ private: | |||
| 1148 | for (const auto& variant : extras) { | 1127 | for (const auto& variant : extras) { |
| 1149 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { | 1128 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { |
| 1150 | expr += GenerateTextureArgument(*argument); | 1129 | expr += GenerateTextureArgument(*argument); |
| 1151 | } else if (std::get_if<TextureAoffi>(&variant)) { | 1130 | } else if (std::holds_alternative<TextureAoffi>(variant)) { |
| 1152 | expr += GenerateTextureAoffi(meta->aoffi); | 1131 | expr += GenerateTextureAoffi(meta->aoffi); |
| 1153 | } else { | 1132 | } else { |
| 1154 | UNREACHABLE(); | 1133 | UNREACHABLE(); |
| @@ -1158,8 +1137,8 @@ private: | |||
| 1158 | return expr + ')'; | 1137 | return expr + ')'; |
| 1159 | } | 1138 | } |
| 1160 | 1139 | ||
| 1161 | std::string GenerateTextureArgument(TextureArgument argument) { | 1140 | std::string GenerateTextureArgument(const TextureArgument& argument) { |
| 1162 | const auto [type, operand] = argument; | 1141 | const auto& [type, operand] = argument; |
| 1163 | if (operand == nullptr) { | 1142 | if (operand == nullptr) { |
| 1164 | return {}; | 1143 | return {}; |
| 1165 | } | 1144 | } |
| @@ -1235,7 +1214,7 @@ private: | |||
| 1235 | 1214 | ||
| 1236 | std::string BuildImageValues(Operation operation) { | 1215 | std::string BuildImageValues(Operation operation) { |
| 1237 | constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; | 1216 | constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"}; |
| 1238 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1217 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1239 | 1218 | ||
| 1240 | const std::size_t values_count{meta.values.size()}; | 1219 | const std::size_t values_count{meta.values.size()}; |
| 1241 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); | 1220 | std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); |
| @@ -1780,14 +1759,14 @@ private: | |||
| 1780 | return {"0", Type::Int}; | 1759 | return {"0", Type::Int}; |
| 1781 | } | 1760 | } |
| 1782 | 1761 | ||
| 1783 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1762 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1784 | return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), | 1763 | return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), |
| 1785 | BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), | 1764 | BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), |
| 1786 | Type::Uint}; | 1765 | Type::Uint}; |
| 1787 | } | 1766 | } |
| 1788 | 1767 | ||
| 1789 | Expression ImageStore(Operation operation) { | 1768 | Expression ImageStore(Operation operation) { |
| 1790 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1769 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1791 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), | 1770 | code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), |
| 1792 | BuildIntegerCoordinates(operation), BuildImageValues(operation)); | 1771 | BuildIntegerCoordinates(operation), BuildImageValues(operation)); |
| 1793 | return {}; | 1772 | return {}; |
| @@ -1795,7 +1774,7 @@ private: | |||
| 1795 | 1774 | ||
| 1796 | template <const std::string_view& opname> | 1775 | template <const std::string_view& opname> |
| 1797 | Expression AtomicImage(Operation operation) { | 1776 | Expression AtomicImage(Operation operation) { |
| 1798 | const auto meta{std::get<MetaImage>(operation.GetMeta())}; | 1777 | const auto& meta{std::get<MetaImage>(operation.GetMeta())}; |
| 1799 | ASSERT(meta.values.size() == 1); | 1778 | ASSERT(meta.values.size() == 1); |
| 1800 | 1779 | ||
| 1801 | return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), | 1780 | return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image), |
| @@ -2246,7 +2225,7 @@ private: | |||
| 2246 | code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); | 2225 | code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex()); |
| 2247 | } | 2226 | } |
| 2248 | 2227 | ||
| 2249 | std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const { | 2228 | std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { |
| 2250 | return fmt::format("{}_{}_{}", name, index, suffix); | 2229 | return fmt::format("{}_{}_{}", name, index, suffix); |
| 2251 | } | 2230 | } |
| 2252 | 2231 | ||
| @@ -2271,17 +2250,15 @@ private: | |||
| 2271 | ShaderWriter code; | 2250 | ShaderWriter code; |
| 2272 | }; | 2251 | }; |
| 2273 | 2252 | ||
| 2274 | static constexpr std::string_view flow_var = "flow_var_"; | ||
| 2275 | |||
| 2276 | std::string GetFlowVariable(u32 i) { | 2253 | std::string GetFlowVariable(u32 i) { |
| 2277 | return fmt::format("{}{}", flow_var, i); | 2254 | return fmt::format("flow_var_{}", i); |
| 2278 | } | 2255 | } |
| 2279 | 2256 | ||
| 2280 | class ExprDecompiler { | 2257 | class ExprDecompiler { |
| 2281 | public: | 2258 | public: |
| 2282 | explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} | 2259 | explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} |
| 2283 | 2260 | ||
| 2284 | void operator()(VideoCommon::Shader::ExprAnd& expr) { | 2261 | void operator()(const ExprAnd& expr) { |
| 2285 | inner += "( "; | 2262 | inner += "( "; |
| 2286 | std::visit(*this, *expr.operand1); | 2263 | std::visit(*this, *expr.operand1); |
| 2287 | inner += " && "; | 2264 | inner += " && "; |
| @@ -2289,7 +2266,7 @@ public: | |||
| 2289 | inner += ')'; | 2266 | inner += ')'; |
| 2290 | } | 2267 | } |
| 2291 | 2268 | ||
| 2292 | void operator()(VideoCommon::Shader::ExprOr& expr) { | 2269 | void operator()(const ExprOr& expr) { |
| 2293 | inner += "( "; | 2270 | inner += "( "; |
| 2294 | std::visit(*this, *expr.operand1); | 2271 | std::visit(*this, *expr.operand1); |
| 2295 | inner += " || "; | 2272 | inner += " || "; |
| @@ -2297,17 +2274,17 @@ public: | |||
| 2297 | inner += ')'; | 2274 | inner += ')'; |
| 2298 | } | 2275 | } |
| 2299 | 2276 | ||
| 2300 | void operator()(VideoCommon::Shader::ExprNot& expr) { | 2277 | void operator()(const ExprNot& expr) { |
| 2301 | inner += '!'; | 2278 | inner += '!'; |
| 2302 | std::visit(*this, *expr.operand1); | 2279 | std::visit(*this, *expr.operand1); |
| 2303 | } | 2280 | } |
| 2304 | 2281 | ||
| 2305 | void operator()(VideoCommon::Shader::ExprPredicate& expr) { | 2282 | void operator()(const ExprPredicate& expr) { |
| 2306 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); | 2283 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); |
| 2307 | inner += decomp.GetPredicate(pred); | 2284 | inner += decomp.GetPredicate(pred); |
| 2308 | } | 2285 | } |
| 2309 | 2286 | ||
| 2310 | void operator()(VideoCommon::Shader::ExprCondCode& expr) { | 2287 | void operator()(const ExprCondCode& expr) { |
| 2311 | const Node cc = decomp.ir.GetConditionCode(expr.cc); | 2288 | const Node cc = decomp.ir.GetConditionCode(expr.cc); |
| 2312 | std::string target; | 2289 | std::string target; |
| 2313 | 2290 | ||
| @@ -2316,10 +2293,13 @@ public: | |||
| 2316 | switch (index) { | 2293 | switch (index) { |
| 2317 | case Tegra::Shader::Pred::NeverExecute: | 2294 | case Tegra::Shader::Pred::NeverExecute: |
| 2318 | target = "false"; | 2295 | target = "false"; |
| 2296 | break; | ||
| 2319 | case Tegra::Shader::Pred::UnusedIndex: | 2297 | case Tegra::Shader::Pred::UnusedIndex: |
| 2320 | target = "true"; | 2298 | target = "true"; |
| 2299 | break; | ||
| 2321 | default: | 2300 | default: |
| 2322 | target = decomp.GetPredicate(index); | 2301 | target = decomp.GetPredicate(index); |
| 2302 | break; | ||
| 2323 | } | 2303 | } |
| 2324 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { | 2304 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { |
| 2325 | target = decomp.GetInternalFlag(flag->GetFlag()); | 2305 | target = decomp.GetInternalFlag(flag->GetFlag()); |
| @@ -2329,15 +2309,20 @@ public: | |||
| 2329 | inner += target; | 2309 | inner += target; |
| 2330 | } | 2310 | } |
| 2331 | 2311 | ||
| 2332 | void operator()(VideoCommon::Shader::ExprVar& expr) { | 2312 | void operator()(const ExprVar& expr) { |
| 2333 | inner += GetFlowVariable(expr.var_index); | 2313 | inner += GetFlowVariable(expr.var_index); |
| 2334 | } | 2314 | } |
| 2335 | 2315 | ||
| 2336 | void operator()(VideoCommon::Shader::ExprBoolean& expr) { | 2316 | void operator()(const ExprBoolean& expr) { |
| 2337 | inner += expr.value ? "true" : "false"; | 2317 | inner += expr.value ? "true" : "false"; |
| 2338 | } | 2318 | } |
| 2339 | 2319 | ||
| 2340 | std::string& GetResult() { | 2320 | void operator()(VideoCommon::Shader::ExprGprEqual& expr) { |
| 2321 | inner += | ||
| 2322 | "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')'; | ||
| 2323 | } | ||
| 2324 | |||
| 2325 | const std::string& GetResult() const { | ||
| 2341 | return inner; | 2326 | return inner; |
| 2342 | } | 2327 | } |
| 2343 | 2328 | ||
| @@ -2350,7 +2335,7 @@ class ASTDecompiler { | |||
| 2350 | public: | 2335 | public: |
| 2351 | explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} | 2336 | explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {} |
| 2352 | 2337 | ||
| 2353 | void operator()(VideoCommon::Shader::ASTProgram& ast) { | 2338 | void operator()(const ASTProgram& ast) { |
| 2354 | ASTNode current = ast.nodes.GetFirst(); | 2339 | ASTNode current = ast.nodes.GetFirst(); |
| 2355 | while (current) { | 2340 | while (current) { |
| 2356 | Visit(current); | 2341 | Visit(current); |
| @@ -2358,7 +2343,7 @@ public: | |||
| 2358 | } | 2343 | } |
| 2359 | } | 2344 | } |
| 2360 | 2345 | ||
| 2361 | void operator()(VideoCommon::Shader::ASTIfThen& ast) { | 2346 | void operator()(const ASTIfThen& ast) { |
| 2362 | ExprDecompiler expr_parser{decomp}; | 2347 | ExprDecompiler expr_parser{decomp}; |
| 2363 | std::visit(expr_parser, *ast.condition); | 2348 | std::visit(expr_parser, *ast.condition); |
| 2364 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); | 2349 | decomp.code.AddLine("if ({}) {{", expr_parser.GetResult()); |
| @@ -2372,7 +2357,7 @@ public: | |||
| 2372 | decomp.code.AddLine("}}"); | 2357 | decomp.code.AddLine("}}"); |
| 2373 | } | 2358 | } |
| 2374 | 2359 | ||
| 2375 | void operator()(VideoCommon::Shader::ASTIfElse& ast) { | 2360 | void operator()(const ASTIfElse& ast) { |
| 2376 | decomp.code.AddLine("else {{"); | 2361 | decomp.code.AddLine("else {{"); |
| 2377 | decomp.code.scope++; | 2362 | decomp.code.scope++; |
| 2378 | ASTNode current = ast.nodes.GetFirst(); | 2363 | ASTNode current = ast.nodes.GetFirst(); |
| @@ -2384,29 +2369,29 @@ public: | |||
| 2384 | decomp.code.AddLine("}}"); | 2369 | decomp.code.AddLine("}}"); |
| 2385 | } | 2370 | } |
| 2386 | 2371 | ||
| 2387 | void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { | 2372 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { |
| 2388 | UNREACHABLE(); | 2373 | UNREACHABLE(); |
| 2389 | } | 2374 | } |
| 2390 | 2375 | ||
| 2391 | void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { | 2376 | void operator()(const ASTBlockDecoded& ast) { |
| 2392 | decomp.VisitBlock(ast.nodes); | 2377 | decomp.VisitBlock(ast.nodes); |
| 2393 | } | 2378 | } |
| 2394 | 2379 | ||
| 2395 | void operator()(VideoCommon::Shader::ASTVarSet& ast) { | 2380 | void operator()(const ASTVarSet& ast) { |
| 2396 | ExprDecompiler expr_parser{decomp}; | 2381 | ExprDecompiler expr_parser{decomp}; |
| 2397 | std::visit(expr_parser, *ast.condition); | 2382 | std::visit(expr_parser, *ast.condition); |
| 2398 | decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); | 2383 | decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult()); |
| 2399 | } | 2384 | } |
| 2400 | 2385 | ||
| 2401 | void operator()(VideoCommon::Shader::ASTLabel& ast) { | 2386 | void operator()(const ASTLabel& ast) { |
| 2402 | decomp.code.AddLine("// Label_{}:", ast.index); | 2387 | decomp.code.AddLine("// Label_{}:", ast.index); |
| 2403 | } | 2388 | } |
| 2404 | 2389 | ||
| 2405 | void operator()(VideoCommon::Shader::ASTGoto& ast) { | 2390 | void operator()([[maybe_unused]] const ASTGoto& ast) { |
| 2406 | UNREACHABLE(); | 2391 | UNREACHABLE(); |
| 2407 | } | 2392 | } |
| 2408 | 2393 | ||
| 2409 | void operator()(VideoCommon::Shader::ASTDoWhile& ast) { | 2394 | void operator()(const ASTDoWhile& ast) { |
| 2410 | ExprDecompiler expr_parser{decomp}; | 2395 | ExprDecompiler expr_parser{decomp}; |
| 2411 | std::visit(expr_parser, *ast.condition); | 2396 | std::visit(expr_parser, *ast.condition); |
| 2412 | decomp.code.AddLine("do {{"); | 2397 | decomp.code.AddLine("do {{"); |
| @@ -2420,7 +2405,7 @@ public: | |||
| 2420 | decomp.code.AddLine("}} while({});", expr_parser.GetResult()); | 2405 | decomp.code.AddLine("}} while({});", expr_parser.GetResult()); |
| 2421 | } | 2406 | } |
| 2422 | 2407 | ||
| 2423 | void operator()(VideoCommon::Shader::ASTReturn& ast) { | 2408 | void operator()(const ASTReturn& ast) { |
| 2424 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | 2409 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); |
| 2425 | if (!is_true) { | 2410 | if (!is_true) { |
| 2426 | ExprDecompiler expr_parser{decomp}; | 2411 | ExprDecompiler expr_parser{decomp}; |
| @@ -2440,7 +2425,7 @@ public: | |||
| 2440 | } | 2425 | } |
| 2441 | } | 2426 | } |
| 2442 | 2427 | ||
| 2443 | void operator()(VideoCommon::Shader::ASTBreak& ast) { | 2428 | void operator()(const ASTBreak& ast) { |
| 2444 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); | 2429 | const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition); |
| 2445 | if (!is_true) { | 2430 | if (!is_true) { |
| 2446 | ExprDecompiler expr_parser{decomp}; | 2431 | ExprDecompiler expr_parser{decomp}; |
| @@ -2455,7 +2440,7 @@ public: | |||
| 2455 | } | 2440 | } |
| 2456 | } | 2441 | } |
| 2457 | 2442 | ||
| 2458 | void Visit(VideoCommon::Shader::ASTNode& node) { | 2443 | void Visit(const ASTNode& node) { |
| 2459 | std::visit(*this, *node->GetInnerData()); | 2444 | std::visit(*this, *node->GetInnerData()); |
| 2460 | } | 2445 | } |
| 2461 | 2446 | ||
| @@ -2468,32 +2453,53 @@ void GLSLDecompiler::DecompileAST() { | |||
| 2468 | for (u32 i = 0; i < num_flow_variables; i++) { | 2453 | for (u32 i = 0; i < num_flow_variables; i++) { |
| 2469 | code.AddLine("bool {} = false;", GetFlowVariable(i)); | 2454 | code.AddLine("bool {} = false;", GetFlowVariable(i)); |
| 2470 | } | 2455 | } |
| 2456 | |||
| 2471 | ASTDecompiler decompiler{*this}; | 2457 | ASTDecompiler decompiler{*this}; |
| 2472 | VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); | 2458 | decompiler.Visit(ir.GetASTProgram()); |
| 2473 | decompiler.Visit(program); | ||
| 2474 | } | 2459 | } |
| 2475 | 2460 | ||
| 2476 | } // Anonymous namespace | 2461 | } // Anonymous namespace |
| 2477 | 2462 | ||
| 2463 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | ||
| 2464 | ShaderEntries entries; | ||
| 2465 | for (const auto& cbuf : ir.GetConstantBuffers()) { | ||
| 2466 | entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||
| 2467 | cbuf.first); | ||
| 2468 | } | ||
| 2469 | for (const auto& sampler : ir.GetSamplers()) { | ||
| 2470 | entries.samplers.emplace_back(sampler); | ||
| 2471 | } | ||
| 2472 | for (const auto& [offset, image] : ir.GetImages()) { | ||
| 2473 | entries.images.emplace_back(image); | ||
| 2474 | } | ||
| 2475 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||
| 2476 | entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, | ||
| 2477 | usage.is_written); | ||
| 2478 | } | ||
| 2479 | entries.clip_distances = ir.GetClipDistances(); | ||
| 2480 | entries.shader_length = ir.GetLength(); | ||
| 2481 | return entries; | ||
| 2482 | } | ||
| 2483 | |||
| 2478 | std::string GetCommonDeclarations() { | 2484 | std::string GetCommonDeclarations() { |
| 2479 | return fmt::format( | 2485 | return R"(#define ftoi floatBitsToInt |
| 2480 | "#define ftoi floatBitsToInt\n" | 2486 | #define ftou floatBitsToUint |
| 2481 | "#define ftou floatBitsToUint\n" | 2487 | #define itof intBitsToFloat |
| 2482 | "#define itof intBitsToFloat\n" | 2488 | #define utof uintBitsToFloat |
| 2483 | "#define utof uintBitsToFloat\n\n" | 2489 | |
| 2484 | "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" | 2490 | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { |
| 2485 | " bvec2 is_nan1 = isnan(pair1);\n" | 2491 | bvec2 is_nan1 = isnan(pair1); |
| 2486 | " bvec2 is_nan2 = isnan(pair2);\n" | 2492 | bvec2 is_nan2 = isnan(pair2); |
| 2487 | " return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " | 2493 | return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); |
| 2488 | "is_nan2.y);\n" | 2494 | } |
| 2489 | "}}\n\n"); | 2495 | )"; |
| 2490 | } | 2496 | } |
| 2491 | 2497 | ||
| 2492 | ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, | 2498 | std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, |
| 2493 | const std::string& suffix) { | 2499 | const std::string& suffix) { |
| 2494 | GLSLDecompiler decompiler(device, ir, stage, suffix); | 2500 | GLSLDecompiler decompiler(device, ir, stage, suffix); |
| 2495 | decompiler.Decompile(); | 2501 | decompiler.Decompile(); |
| 2496 | return {decompiler.GetResult(), decompiler.GetShaderEntries()}; | 2502 | return decompiler.GetResult(); |
| 2497 | } | 2503 | } |
| 2498 | 2504 | ||
| 2499 | } // namespace OpenGL::GLShader | 2505 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index e538dc001..fead2a51e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -34,10 +34,7 @@ enum class ProgramType : u32 { | |||
| 34 | 34 | ||
| 35 | namespace OpenGL::GLShader { | 35 | namespace OpenGL::GLShader { |
| 36 | 36 | ||
| 37 | struct ShaderEntries; | ||
| 38 | |||
| 39 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 37 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 40 | using ProgramResult = std::pair<std::string, ShaderEntries>; | ||
| 41 | using SamplerEntry = VideoCommon::Shader::Sampler; | 38 | using SamplerEntry = VideoCommon::Shader::Sampler; |
| 42 | using ImageEntry = VideoCommon::Shader::Image; | 39 | using ImageEntry = VideoCommon::Shader::Image; |
| 43 | 40 | ||
| @@ -93,9 +90,11 @@ struct ShaderEntries { | |||
| 93 | std::size_t shader_length{}; | 90 | std::size_t shader_length{}; |
| 94 | }; | 91 | }; |
| 95 | 92 | ||
| 93 | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | ||
| 94 | |||
| 96 | std::string GetCommonDeclarations(); | 95 | std::string GetCommonDeclarations(); |
| 97 | 96 | ||
| 98 | ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 97 | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
| 99 | ProgramType stage, const std::string& suffix); | 98 | ProgramType stage, const std::string& suffix); |
| 100 | 99 | ||
| 101 | } // namespace OpenGL::GLShader | 100 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 74cc33476..184a565e6 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -22,6 +22,29 @@ | |||
| 22 | 22 | ||
| 23 | namespace OpenGL { | 23 | namespace OpenGL { |
| 24 | 24 | ||
| 25 | using VideoCommon::Shader::BindlessSamplerMap; | ||
| 26 | using VideoCommon::Shader::BoundSamplerMap; | ||
| 27 | using VideoCommon::Shader::KeyMap; | ||
| 28 | |||
| 29 | namespace { | ||
| 30 | |||
| 31 | struct ConstBufferKey { | ||
| 32 | u32 cbuf; | ||
| 33 | u32 offset; | ||
| 34 | u32 value; | ||
| 35 | }; | ||
| 36 | |||
| 37 | struct BoundSamplerKey { | ||
| 38 | u32 offset; | ||
| 39 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct BindlessSamplerKey { | ||
| 43 | u32 cbuf; | ||
| 44 | u32 offset; | ||
| 45 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 46 | }; | ||
| 47 | |||
| 25 | using ShaderCacheVersionHash = std::array<u8, 64>; | 48 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 26 | 49 | ||
| 27 | enum class TransferableEntryKind : u32 { | 50 | enum class TransferableEntryKind : u32 { |
| @@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 { | |||
| 29 | Usage, | 52 | Usage, |
| 30 | }; | 53 | }; |
| 31 | 54 | ||
| 32 | enum class PrecompiledEntryKind : u32 { | 55 | constexpr u32 NativeVersion = 5; |
| 33 | Decompiled, | ||
| 34 | Dump, | ||
| 35 | }; | ||
| 36 | |||
| 37 | constexpr u32 NativeVersion = 4; | ||
| 38 | 56 | ||
| 39 | // Making sure sizes doesn't change by accident | 57 | // Making sure sizes doesn't change by accident |
| 40 | static_assert(sizeof(BaseBindings) == 16); | 58 | static_assert(sizeof(BaseBindings) == 16); |
| 41 | static_assert(sizeof(ShaderDiskCacheUsage) == 40); | ||
| 42 | |||
| 43 | namespace { | ||
| 44 | 59 | ||
| 45 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 60 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 46 | ShaderCacheVersionHash hash{}; | 61 | ShaderCacheVersionHash hash{}; |
| @@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 49 | return hash; | 64 | return hash; |
| 50 | } | 65 | } |
| 51 | 66 | ||
| 52 | } // namespace | 67 | } // Anonymous namespace |
| 53 | 68 | ||
| 54 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | 69 | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 55 | u32 program_code_size, u32 program_code_size_b, | ||
| 56 | ProgramCode program_code, ProgramCode program_code_b) | 70 | ProgramCode program_code, ProgramCode program_code_b) |
| 57 | : unique_identifier{unique_identifier}, program_type{program_type}, | 71 | : unique_identifier{unique_identifier}, program_type{program_type}, |
| 58 | program_code_size{program_code_size}, program_code_size_b{program_code_size_b}, | ||
| 59 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} | 72 | program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} |
| 60 | 73 | ||
| 61 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | 74 | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; |
| @@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | |||
| 90 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | 103 | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { |
| 91 | if (file.WriteObject(unique_identifier) != 1 || | 104 | if (file.WriteObject(unique_identifier) != 1 || |
| 92 | file.WriteObject(static_cast<u32>(program_type)) != 1 || | 105 | file.WriteObject(static_cast<u32>(program_type)) != 1 || |
| 93 | file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { | 106 | file.WriteObject(static_cast<u32>(program_code.size())) != 1 || |
| 107 | file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) { | ||
| 94 | return false; | 108 | return false; |
| 95 | } | 109 | } |
| 96 | 110 | ||
| 97 | if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) | 111 | if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) |
| 98 | return false; | 112 | return false; |
| 99 | 113 | ||
| 100 | if (HasProgramA() && | 114 | if (HasProgramA() && |
| 101 | file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { | 115 | file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { |
| 102 | return false; | 116 | return false; |
| 103 | } | 117 | } |
| 104 | return true; | 118 | return true; |
| @@ -127,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 127 | u32 version{}; | 141 | u32 version{}; |
| 128 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { | 142 | if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { |
| 129 | LOG_ERROR(Render_OpenGL, | 143 | LOG_ERROR(Render_OpenGL, |
| 130 | "Failed to get transferable cache version for title id={} - skipping", | 144 | "Failed to get transferable cache version for title id={}, skipping", |
| 131 | GetTitleID()); | 145 | GetTitleID()); |
| 132 | return {}; | 146 | return {}; |
| 133 | } | 147 | } |
| 134 | 148 | ||
| 135 | if (version < NativeVersion) { | 149 | if (version < NativeVersion) { |
| 136 | LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); | 150 | LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); |
| 137 | file.Close(); | 151 | file.Close(); |
| 138 | InvalidateTransferable(); | 152 | InvalidateTransferable(); |
| 139 | is_usable = true; | 153 | is_usable = true; |
| @@ -141,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 141 | } | 155 | } |
| 142 | if (version > NativeVersion) { | 156 | if (version > NativeVersion) { |
| 143 | LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " | 157 | LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " |
| 144 | "of the emulator - skipping"); | 158 | "of the emulator, skipping"); |
| 145 | return {}; | 159 | return {}; |
| 146 | } | 160 | } |
| 147 | 161 | ||
| 148 | // Version is valid, load the shaders | 162 | // Version is valid, load the shaders |
| 163 | constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; | ||
| 149 | std::vector<ShaderDiskCacheRaw> raws; | 164 | std::vector<ShaderDiskCacheRaw> raws; |
| 150 | std::vector<ShaderDiskCacheUsage> usages; | 165 | std::vector<ShaderDiskCacheUsage> usages; |
| 151 | while (file.Tell() < file.GetSize()) { | 166 | while (file.Tell() < file.GetSize()) { |
| 152 | TransferableEntryKind kind{}; | 167 | TransferableEntryKind kind{}; |
| 153 | if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { | 168 | if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { |
| 154 | LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); | 169 | LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); |
| 155 | return {}; | 170 | return {}; |
| 156 | } | 171 | } |
| 157 | 172 | ||
| @@ -159,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 159 | case TransferableEntryKind::Raw: { | 174 | case TransferableEntryKind::Raw: { |
| 160 | ShaderDiskCacheRaw entry; | 175 | ShaderDiskCacheRaw entry; |
| 161 | if (!entry.Load(file)) { | 176 | if (!entry.Load(file)) { |
| 162 | LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); | 177 | LOG_ERROR(Render_OpenGL, error_loading); |
| 163 | return {}; | 178 | return {}; |
| 164 | } | 179 | } |
| 165 | transferable.insert({entry.GetUniqueIdentifier(), {}}); | 180 | transferable.insert({entry.GetUniqueIdentifier(), {}}); |
| @@ -167,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 167 | break; | 182 | break; |
| 168 | } | 183 | } |
| 169 | case TransferableEntryKind::Usage: { | 184 | case TransferableEntryKind::Usage: { |
| 170 | ShaderDiskCacheUsage usage{}; | 185 | ShaderDiskCacheUsage usage; |
| 171 | if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { | 186 | |
| 172 | LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); | 187 | u32 num_keys{}; |
| 188 | u32 num_bound_samplers{}; | ||
| 189 | u32 num_bindless_samplers{}; | ||
| 190 | if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | ||
| 191 | file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||
| 192 | file.ReadArray(&num_bound_samplers, 1) != 1 || | ||
| 193 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||
| 194 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 173 | return {}; | 195 | return {}; |
| 174 | } | 196 | } |
| 197 | |||
| 198 | std::vector<ConstBufferKey> keys(num_keys); | ||
| 199 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||
| 200 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||
| 201 | if (file.ReadArray(keys.data(), keys.size()) != keys.size() || | ||
| 202 | file.ReadArray(bound_samplers.data(), bound_samplers.size()) != | ||
| 203 | bound_samplers.size() || | ||
| 204 | file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != | ||
| 205 | bindless_samplers.size()) { | ||
| 206 | LOG_ERROR(Render_OpenGL, error_loading); | ||
| 207 | return {}; | ||
| 208 | } | ||
| 209 | for (const auto& key : keys) { | ||
| 210 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||
| 211 | } | ||
| 212 | for (const auto& key : bound_samplers) { | ||
| 213 | usage.bound_samplers.emplace(key.offset, key.sampler); | ||
| 214 | } | ||
| 215 | for (const auto& key : bindless_samplers) { | ||
| 216 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||
| 217 | } | ||
| 218 | |||
| 175 | usages.push_back(std::move(usage)); | 219 | usages.push_back(std::move(usage)); |
| 176 | break; | 220 | break; |
| 177 | } | 221 | } |
| 178 | default: | 222 | default: |
| 179 | LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", | 223 | LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", |
| 180 | static_cast<u32>(kind)); | 224 | static_cast<u32>(kind)); |
| 181 | return {}; | 225 | return {}; |
| 182 | } | 226 | } |
| @@ -186,13 +230,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 186 | return {{std::move(raws), std::move(usages)}}; | 230 | return {{std::move(raws), std::move(usages)}}; |
| 187 | } | 231 | } |
| 188 | 232 | ||
| 189 | std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> | 233 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> |
| 190 | ShaderDiskCacheOpenGL::LoadPrecompiled() { | 234 | ShaderDiskCacheOpenGL::LoadPrecompiled() { |
| 191 | if (!is_usable) { | 235 | if (!is_usable) { |
| 192 | return {}; | 236 | return {}; |
| 193 | } | 237 | } |
| 194 | 238 | ||
| 195 | FileUtil::IOFile file(GetPrecompiledPath(), "rb"); | 239 | std::string path = GetPrecompiledPath(); |
| 240 | FileUtil::IOFile file(path, "rb"); | ||
| 196 | if (!file.IsOpen()) { | 241 | if (!file.IsOpen()) { |
| 197 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", | 242 | LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", |
| 198 | GetTitleID()); | 243 | GetTitleID()); |
| @@ -202,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { | |||
| 202 | const auto result = LoadPrecompiledFile(file); | 247 | const auto result = LoadPrecompiledFile(file); |
| 203 | if (!result) { | 248 | if (!result) { |
| 204 | LOG_INFO(Render_OpenGL, | 249 | LOG_INFO(Render_OpenGL, |
| 205 | "Failed to load precompiled cache for game with title id={} - removing", | 250 | "Failed to load precompiled cache for game with title id={}, removing", |
| 206 | GetTitleID()); | 251 | GetTitleID()); |
| 207 | file.Close(); | 252 | file.Close(); |
| 208 | InvalidatePrecompiled(); | 253 | InvalidatePrecompiled(); |
| @@ -211,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { | |||
| 211 | return *result; | 256 | return *result; |
| 212 | } | 257 | } |
| 213 | 258 | ||
| 214 | std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> | 259 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> |
| 215 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | 260 | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { |
| 216 | // Read compressed file from disk and decompress to virtual precompiled cache file | 261 | // Read compressed file from disk and decompress to virtual precompiled cache file |
| 217 | std::vector<u8> compressed(file.GetSize()); | 262 | std::vector<u8> compressed(file.GetSize()); |
| @@ -231,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 231 | return {}; | 276 | return {}; |
| 232 | } | 277 | } |
| 233 | 278 | ||
| 234 | std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; | ||
| 235 | ShaderDumpsMap dumps; | 279 | ShaderDumpsMap dumps; |
| 236 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | 280 | while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |
| 237 | PrecompiledEntryKind kind{}; | 281 | u32 num_keys{}; |
| 238 | if (!LoadObjectFromPrecompiled(kind)) { | 282 | u32 num_bound_samplers{}; |
| 283 | u32 num_bindless_samplers{}; | ||
| 284 | ShaderDiskCacheUsage usage; | ||
| 285 | if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | ||
| 286 | !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || | ||
| 287 | !LoadObjectFromPrecompiled(num_bound_samplers) || | ||
| 288 | !LoadObjectFromPrecompiled(num_bindless_samplers)) { | ||
| 239 | return {}; | 289 | return {}; |
| 240 | } | 290 | } |
| 241 | 291 | std::vector<ConstBufferKey> keys(num_keys); | |
| 242 | switch (kind) { | 292 | std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); |
| 243 | case PrecompiledEntryKind::Decompiled: { | 293 | std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); |
| 244 | u64 unique_identifier{}; | 294 | if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || |
| 245 | if (!LoadObjectFromPrecompiled(unique_identifier)) { | 295 | !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != |
| 246 | return {}; | 296 | bound_samplers.size() || |
| 247 | } | 297 | !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != |
| 248 | 298 | bindless_samplers.size()) { | |
| 249 | auto entry = LoadDecompiledEntry(); | ||
| 250 | if (!entry) { | ||
| 251 | return {}; | ||
| 252 | } | ||
| 253 | decompiled.insert({unique_identifier, std::move(*entry)}); | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | case PrecompiledEntryKind::Dump: { | ||
| 257 | ShaderDiskCacheUsage usage; | ||
| 258 | if (!LoadObjectFromPrecompiled(usage)) { | ||
| 259 | return {}; | ||
| 260 | } | ||
| 261 | |||
| 262 | ShaderDiskCacheDump dump; | ||
| 263 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { | ||
| 264 | return {}; | ||
| 265 | } | ||
| 266 | |||
| 267 | u32 binary_length{}; | ||
| 268 | if (!LoadObjectFromPrecompiled(binary_length)) { | ||
| 269 | return {}; | ||
| 270 | } | ||
| 271 | |||
| 272 | dump.binary.resize(binary_length); | ||
| 273 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { | ||
| 274 | return {}; | ||
| 275 | } | ||
| 276 | |||
| 277 | dumps.insert({usage, dump}); | ||
| 278 | break; | ||
| 279 | } | ||
| 280 | default: | ||
| 281 | return {}; | 299 | return {}; |
| 282 | } | 300 | } |
| 283 | } | 301 | for (const auto& key : keys) { |
| 284 | return {{decompiled, dumps}}; | 302 | usage.keys.insert({{key.cbuf, key.offset}, key.value}); |
| 285 | } | ||
| 286 | |||
| 287 | std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { | ||
| 288 | u32 code_size{}; | ||
| 289 | if (!LoadObjectFromPrecompiled(code_size)) { | ||
| 290 | return {}; | ||
| 291 | } | ||
| 292 | |||
| 293 | std::string code(code_size, '\0'); | ||
| 294 | if (!LoadArrayFromPrecompiled(code.data(), code.size())) { | ||
| 295 | return {}; | ||
| 296 | } | ||
| 297 | |||
| 298 | ShaderDiskCacheDecompiled entry; | ||
| 299 | entry.code = std::move(code); | ||
| 300 | |||
| 301 | u32 const_buffers_count{}; | ||
| 302 | if (!LoadObjectFromPrecompiled(const_buffers_count)) { | ||
| 303 | return {}; | ||
| 304 | } | ||
| 305 | |||
| 306 | for (u32 i = 0; i < const_buffers_count; ++i) { | ||
| 307 | u32 max_offset{}; | ||
| 308 | u32 index{}; | ||
| 309 | bool is_indirect{}; | ||
| 310 | if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || | ||
| 311 | !LoadObjectFromPrecompiled(is_indirect)) { | ||
| 312 | return {}; | ||
| 313 | } | 303 | } |
| 314 | entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); | 304 | for (const auto& key : bound_samplers) { |
| 315 | } | 305 | usage.bound_samplers.emplace(key.offset, key.sampler); |
| 316 | |||
| 317 | u32 samplers_count{}; | ||
| 318 | if (!LoadObjectFromPrecompiled(samplers_count)) { | ||
| 319 | return {}; | ||
| 320 | } | ||
| 321 | |||
| 322 | for (u32 i = 0; i < samplers_count; ++i) { | ||
| 323 | u64 offset{}; | ||
| 324 | u64 index{}; | ||
| 325 | u32 type{}; | ||
| 326 | bool is_array{}; | ||
| 327 | bool is_shadow{}; | ||
| 328 | bool is_bindless{}; | ||
| 329 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | ||
| 330 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || | ||
| 331 | !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { | ||
| 332 | return {}; | ||
| 333 | } | 306 | } |
| 334 | entry.entries.samplers.emplace_back( | 307 | for (const auto& key : bindless_samplers) { |
| 335 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | 308 | usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); |
| 336 | static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); | ||
| 337 | } | ||
| 338 | |||
| 339 | u32 images_count{}; | ||
| 340 | if (!LoadObjectFromPrecompiled(images_count)) { | ||
| 341 | return {}; | ||
| 342 | } | ||
| 343 | for (u32 i = 0; i < images_count; ++i) { | ||
| 344 | u64 offset{}; | ||
| 345 | u64 index{}; | ||
| 346 | u32 type{}; | ||
| 347 | u8 is_bindless{}; | ||
| 348 | u8 is_written{}; | ||
| 349 | u8 is_read{}; | ||
| 350 | u8 is_atomic{}; | ||
| 351 | if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || | ||
| 352 | !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || | ||
| 353 | !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || | ||
| 354 | !LoadObjectFromPrecompiled(is_atomic)) { | ||
| 355 | return {}; | ||
| 356 | } | 309 | } |
| 357 | entry.entries.images.emplace_back( | ||
| 358 | static_cast<std::size_t>(offset), static_cast<std::size_t>(index), | ||
| 359 | static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, | ||
| 360 | is_read != 0, is_atomic != 0); | ||
| 361 | } | ||
| 362 | 310 | ||
| 363 | u32 global_memory_count{}; | 311 | ShaderDiskCacheDump dump; |
| 364 | if (!LoadObjectFromPrecompiled(global_memory_count)) { | 312 | if (!LoadObjectFromPrecompiled(dump.binary_format)) { |
| 365 | return {}; | ||
| 366 | } | ||
| 367 | for (u32 i = 0; i < global_memory_count; ++i) { | ||
| 368 | u32 cbuf_index{}; | ||
| 369 | u32 cbuf_offset{}; | ||
| 370 | bool is_read{}; | ||
| 371 | bool is_written{}; | ||
| 372 | if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || | ||
| 373 | !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { | ||
| 374 | return {}; | 313 | return {}; |
| 375 | } | 314 | } |
| 376 | entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, | ||
| 377 | is_written); | ||
| 378 | } | ||
| 379 | 315 | ||
| 380 | for (auto& clip_distance : entry.entries.clip_distances) { | 316 | u32 binary_length{}; |
| 381 | if (!LoadObjectFromPrecompiled(clip_distance)) { | 317 | if (!LoadObjectFromPrecompiled(binary_length)) { |
| 382 | return {}; | 318 | return {}; |
| 383 | } | 319 | } |
| 384 | } | ||
| 385 | |||
| 386 | u64 shader_length{}; | ||
| 387 | if (!LoadObjectFromPrecompiled(shader_length)) { | ||
| 388 | return {}; | ||
| 389 | } | ||
| 390 | entry.entries.shader_length = static_cast<std::size_t>(shader_length); | ||
| 391 | |||
| 392 | return entry; | ||
| 393 | } | ||
| 394 | |||
| 395 | bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, | ||
| 396 | const GLShader::ShaderEntries& entries) { | ||
| 397 | if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || | ||
| 398 | !SaveObjectToPrecompiled(unique_identifier) || | ||
| 399 | !SaveObjectToPrecompiled(static_cast<u32>(code.size())) || | ||
| 400 | !SaveArrayToPrecompiled(code.data(), code.size())) { | ||
| 401 | return false; | ||
| 402 | } | ||
| 403 | |||
| 404 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) { | ||
| 405 | return false; | ||
| 406 | } | ||
| 407 | for (const auto& cbuf : entries.const_buffers) { | ||
| 408 | if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || | ||
| 409 | !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || | ||
| 410 | !SaveObjectToPrecompiled(cbuf.IsIndirect())) { | ||
| 411 | return false; | ||
| 412 | } | ||
| 413 | } | ||
| 414 | |||
| 415 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) { | ||
| 416 | return false; | ||
| 417 | } | ||
| 418 | for (const auto& sampler : entries.samplers) { | ||
| 419 | if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || | ||
| 420 | !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || | ||
| 421 | !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || | ||
| 422 | !SaveObjectToPrecompiled(sampler.IsArray()) || | ||
| 423 | !SaveObjectToPrecompiled(sampler.IsShadow()) || | ||
| 424 | !SaveObjectToPrecompiled(sampler.IsBindless())) { | ||
| 425 | return false; | ||
| 426 | } | ||
| 427 | } | ||
| 428 | |||
| 429 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { | ||
| 430 | return false; | ||
| 431 | } | ||
| 432 | for (const auto& image : entries.images) { | ||
| 433 | if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || | ||
| 434 | !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || | ||
| 435 | !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || | ||
| 436 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || | ||
| 437 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || | ||
| 438 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || | ||
| 439 | !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) { | ||
| 440 | return false; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | 320 | ||
| 444 | if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { | 321 | dump.binary.resize(binary_length); |
| 445 | return false; | 322 | if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { |
| 446 | } | 323 | return {}; |
| 447 | for (const auto& gmem : entries.global_memory_entries) { | ||
| 448 | if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || | ||
| 449 | !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || | ||
| 450 | !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { | ||
| 451 | return false; | ||
| 452 | } | ||
| 453 | } | ||
| 454 | |||
| 455 | for (const bool clip_distance : entries.clip_distances) { | ||
| 456 | if (!SaveObjectToPrecompiled(clip_distance)) { | ||
| 457 | return false; | ||
| 458 | } | 324 | } |
| 459 | } | ||
| 460 | 325 | ||
| 461 | if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { | 326 | dumps.emplace(std::move(usage), dump); |
| 462 | return false; | ||
| 463 | } | 327 | } |
| 464 | 328 | return dumps; | |
| 465 | return true; | ||
| 466 | } | 329 | } |
| 467 | 330 | ||
| 468 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | 331 | void ShaderDiskCacheOpenGL::InvalidateTransferable() { |
| @@ -494,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | |||
| 494 | } | 357 | } |
| 495 | 358 | ||
| 496 | FileUtil::IOFile file = AppendTransferableFile(); | 359 | FileUtil::IOFile file = AppendTransferableFile(); |
| 497 | if (!file.IsOpen()) | 360 | if (!file.IsOpen()) { |
| 498 | return; | 361 | return; |
| 362 | } | ||
| 499 | if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { | 363 | if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { |
| 500 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); | 364 | LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); |
| 501 | file.Close(); | 365 | file.Close(); |
| 502 | InvalidateTransferable(); | 366 | InvalidateTransferable(); |
| 503 | return; | 367 | return; |
| @@ -523,29 +387,39 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | |||
| 523 | FileUtil::IOFile file = AppendTransferableFile(); | 387 | FileUtil::IOFile file = AppendTransferableFile(); |
| 524 | if (!file.IsOpen()) | 388 | if (!file.IsOpen()) |
| 525 | return; | 389 | return; |
| 526 | 390 | const auto Close = [&] { | |
| 527 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { | 391 | LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); |
| 528 | LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing"); | ||
| 529 | file.Close(); | 392 | file.Close(); |
| 530 | InvalidateTransferable(); | 393 | InvalidateTransferable(); |
| 531 | return; | 394 | }; |
| 532 | } | ||
| 533 | } | ||
| 534 | 395 | ||
| 535 | void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, | 396 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || |
| 536 | const GLShader::ShaderEntries& entries) { | 397 | file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || |
| 537 | if (!is_usable) { | 398 | file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || |
| 399 | file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | ||
| 400 | file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | ||
| 401 | Close(); | ||
| 538 | return; | 402 | return; |
| 539 | } | 403 | } |
| 540 | 404 | for (const auto& [pair, value] : usage.keys) { | |
| 541 | if (precompiled_cache_virtual_file.GetSize() == 0) { | 405 | const auto [cbuf, offset] = pair; |
| 542 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | 406 | if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { |
| 407 | Close(); | ||
| 408 | return; | ||
| 409 | } | ||
| 543 | } | 410 | } |
| 544 | 411 | for (const auto& [offset, sampler] : usage.bound_samplers) { | |
| 545 | if (!SaveDecompiledFile(unique_identifier, code, entries)) { | 412 | if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { |
| 546 | LOG_ERROR(Render_OpenGL, | 413 | Close(); |
| 547 | "Failed to save decompiled entry to the precompiled file - removing"); | 414 | return; |
| 548 | InvalidatePrecompiled(); | 415 | } |
| 416 | } | ||
| 417 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 418 | const auto [cbuf, offset] = pair; | ||
| 419 | if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 420 | Close(); | ||
| 421 | return; | ||
| 422 | } | ||
| 549 | } | 423 | } |
| 550 | } | 424 | } |
| 551 | 425 | ||
| @@ -554,6 +428,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 554 | return; | 428 | return; |
| 555 | } | 429 | } |
| 556 | 430 | ||
| 431 | // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header | ||
| 432 | // when writing the dump. This should be done the moment I get access to write to the virtual | ||
| 433 | // file. | ||
| 434 | if (precompiled_cache_virtual_file.GetSize() == 0) { | ||
| 435 | SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||
| 436 | } | ||
| 437 | |||
| 557 | GLint binary_length{}; | 438 | GLint binary_length{}; |
| 558 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | 439 | glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |
| 559 | 440 | ||
| @@ -561,21 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 561 | std::vector<u8> binary(binary_length); | 442 | std::vector<u8> binary(binary_length); |
| 562 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | 443 | glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |
| 563 | 444 | ||
| 564 | if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || | 445 | const auto Close = [&] { |
| 565 | !SaveObjectToPrecompiled(usage) || | 446 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", |
| 566 | !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | ||
| 567 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | ||
| 568 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 569 | LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", | ||
| 570 | usage.unique_identifier); | 447 | usage.unique_identifier); |
| 571 | InvalidatePrecompiled(); | 448 | InvalidatePrecompiled(); |
| 449 | }; | ||
| 450 | |||
| 451 | if (!SaveObjectToPrecompiled(usage.unique_identifier) || | ||
| 452 | !SaveObjectToPrecompiled(usage.variant) || | ||
| 453 | !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | ||
| 454 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | ||
| 455 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | ||
| 456 | Close(); | ||
| 572 | return; | 457 | return; |
| 573 | } | 458 | } |
| 459 | for (const auto& [pair, value] : usage.keys) { | ||
| 460 | const auto [cbuf, offset] = pair; | ||
| 461 | if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { | ||
| 462 | Close(); | ||
| 463 | return; | ||
| 464 | } | ||
| 465 | } | ||
| 466 | for (const auto& [offset, sampler] : usage.bound_samplers) { | ||
| 467 | if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { | ||
| 468 | Close(); | ||
| 469 | return; | ||
| 470 | } | ||
| 471 | } | ||
| 472 | for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||
| 473 | const auto [cbuf, offset] = pair; | ||
| 474 | if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||
| 475 | Close(); | ||
| 476 | return; | ||
| 477 | } | ||
| 478 | } | ||
| 479 | if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | ||
| 480 | !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | ||
| 481 | !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||
| 482 | Close(); | ||
| 483 | } | ||
| 574 | } | 484 | } |
| 575 | 485 | ||
| 576 | FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { | 486 | FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { |
| 577 | if (!EnsureDirectories()) | 487 | if (!EnsureDirectories()) { |
| 578 | return {}; | 488 | return {}; |
| 489 | } | ||
| 579 | 490 | ||
| 580 | const auto transferable_path{GetTransferablePath()}; | 491 | const auto transferable_path{GetTransferablePath()}; |
| 581 | const bool existed = FileUtil::Exists(transferable_path); | 492 | const bool existed = FileUtil::Exists(transferable_path); |
| @@ -607,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { | |||
| 607 | 518 | ||
| 608 | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | 519 | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { |
| 609 | precompiled_cache_virtual_file_offset = 0; | 520 | precompiled_cache_virtual_file_offset = 0; |
| 610 | const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); | 521 | const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); |
| 611 | const std::vector<u8>& compressed = | 522 | const std::vector<u8> compressed = |
| 612 | Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); | 523 | Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); |
| 613 | 524 | ||
| 614 | const auto precompiled_path{GetPrecompiledPath()}; | 525 | const auto precompiled_path{GetPrecompiledPath()}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 9595bd71b..db23ada93 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <string> | 9 | #include <string> |
| 10 | #include <tuple> | 10 | #include <tuple> |
| 11 | #include <type_traits> | ||
| 11 | #include <unordered_map> | 12 | #include <unordered_map> |
| 12 | #include <unordered_set> | 13 | #include <unordered_set> |
| 13 | #include <utility> | 14 | #include <utility> |
| @@ -19,6 +20,7 @@ | |||
| 19 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 20 | #include "core/file_sys/vfs_vector.h" | 21 | #include "core/file_sys/vfs_vector.h" |
| 21 | #include "video_core/renderer_opengl/gl_shader_gen.h" | 22 | #include "video_core/renderer_opengl/gl_shader_gen.h" |
| 23 | #include "video_core/shader/const_buffer_locker.h" | ||
| 22 | 24 | ||
| 23 | namespace Core { | 25 | namespace Core { |
| 24 | class System; | 26 | class System; |
| @@ -53,6 +55,7 @@ struct BaseBindings { | |||
| 53 | return !operator==(rhs); | 55 | return !operator==(rhs); |
| 54 | } | 56 | } |
| 55 | }; | 57 | }; |
| 58 | static_assert(std::is_trivially_copyable_v<BaseBindings>); | ||
| 56 | 59 | ||
| 57 | /// Describes the different variants a single program can be compiled. | 60 | /// Describes the different variants a single program can be compiled. |
| 58 | struct ProgramVariant { | 61 | struct ProgramVariant { |
| @@ -70,13 +73,20 @@ struct ProgramVariant { | |||
| 70 | } | 73 | } |
| 71 | }; | 74 | }; |
| 72 | 75 | ||
| 76 | static_assert(std::is_trivially_copyable_v<ProgramVariant>); | ||
| 77 | |||
| 73 | /// Describes how a shader is used. | 78 | /// Describes how a shader is used. |
| 74 | struct ShaderDiskCacheUsage { | 79 | struct ShaderDiskCacheUsage { |
| 75 | u64 unique_identifier{}; | 80 | u64 unique_identifier{}; |
| 76 | ProgramVariant variant; | 81 | ProgramVariant variant; |
| 82 | VideoCommon::Shader::KeyMap keys; | ||
| 83 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||
| 84 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||
| 77 | 85 | ||
| 78 | bool operator==(const ShaderDiskCacheUsage& rhs) const { | 86 | bool operator==(const ShaderDiskCacheUsage& rhs) const { |
| 79 | return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); | 87 | return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == |
| 88 | std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, | ||
| 89 | rhs.bindless_samplers); | ||
| 80 | } | 90 | } |
| 81 | 91 | ||
| 82 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { | 92 | bool operator!=(const ShaderDiskCacheUsage& rhs) const { |
| @@ -123,8 +133,7 @@ namespace OpenGL { | |||
| 123 | class ShaderDiskCacheRaw { | 133 | class ShaderDiskCacheRaw { |
| 124 | public: | 134 | public: |
| 125 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | 135 | explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |
| 126 | u32 program_code_size, u32 program_code_size_b, | 136 | ProgramCode program_code, ProgramCode program_code_b = {}); |
| 127 | ProgramCode program_code, ProgramCode program_code_b); | ||
| 128 | ShaderDiskCacheRaw(); | 137 | ShaderDiskCacheRaw(); |
| 129 | ~ShaderDiskCacheRaw(); | 138 | ~ShaderDiskCacheRaw(); |
| 130 | 139 | ||
| @@ -155,22 +164,14 @@ public: | |||
| 155 | private: | 164 | private: |
| 156 | u64 unique_identifier{}; | 165 | u64 unique_identifier{}; |
| 157 | ProgramType program_type{}; | 166 | ProgramType program_type{}; |
| 158 | u32 program_code_size{}; | ||
| 159 | u32 program_code_size_b{}; | ||
| 160 | 167 | ||
| 161 | ProgramCode program_code; | 168 | ProgramCode program_code; |
| 162 | ProgramCode program_code_b; | 169 | ProgramCode program_code_b; |
| 163 | }; | 170 | }; |
| 164 | 171 | ||
| 165 | /// Contains decompiled data from a shader | ||
| 166 | struct ShaderDiskCacheDecompiled { | ||
| 167 | std::string code; | ||
| 168 | GLShader::ShaderEntries entries; | ||
| 169 | }; | ||
| 170 | |||
| 171 | /// Contains an OpenGL dumped binary program | 172 | /// Contains an OpenGL dumped binary program |
| 172 | struct ShaderDiskCacheDump { | 173 | struct ShaderDiskCacheDump { |
| 173 | GLenum binary_format; | 174 | GLenum binary_format{}; |
| 174 | std::vector<u8> binary; | 175 | std::vector<u8> binary; |
| 175 | }; | 176 | }; |
| 176 | 177 | ||
| @@ -184,9 +185,7 @@ public: | |||
| 184 | LoadTransferable(); | 185 | LoadTransferable(); |
| 185 | 186 | ||
| 186 | /// Loads current game's precompiled cache. Invalidates on failure. | 187 | /// Loads current game's precompiled cache. Invalidates on failure. |
| 187 | std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, | 188 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); |
| 188 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | ||
| 189 | LoadPrecompiled(); | ||
| 190 | 189 | ||
| 191 | /// Removes the transferable (and precompiled) cache file. | 190 | /// Removes the transferable (and precompiled) cache file. |
| 192 | void InvalidateTransferable(); | 191 | void InvalidateTransferable(); |
| @@ -200,10 +199,6 @@ public: | |||
| 200 | /// Saves shader usage to the transferable file. Does not check for collisions. | 199 | /// Saves shader usage to the transferable file. Does not check for collisions. |
| 201 | void SaveUsage(const ShaderDiskCacheUsage& usage); | 200 | void SaveUsage(const ShaderDiskCacheUsage& usage); |
| 202 | 201 | ||
| 203 | /// Saves a decompiled entry to the precompiled file. Does not check for collisions. | ||
| 204 | void SaveDecompiled(u64 unique_identifier, const std::string& code, | ||
| 205 | const GLShader::ShaderEntries& entries); | ||
| 206 | |||
| 207 | /// Saves a dump entry to the precompiled file. Does not check for collisions. | 202 | /// Saves a dump entry to the precompiled file. Does not check for collisions. |
| 208 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); | 203 | void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); |
| 209 | 204 | ||
| @@ -212,18 +207,9 @@ public: | |||
| 212 | 207 | ||
| 213 | private: | 208 | private: |
| 214 | /// Loads the transferable cache. Returns empty on failure. | 209 | /// Loads the transferable cache. Returns empty on failure. |
| 215 | std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, | 210 | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> |
| 216 | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> | ||
| 217 | LoadPrecompiledFile(FileUtil::IOFile& file); | 211 | LoadPrecompiledFile(FileUtil::IOFile& file); |
| 218 | 212 | ||
| 219 | /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on | ||
| 220 | /// failure. | ||
| 221 | std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); | ||
| 222 | |||
| 223 | /// Saves a decompiled entry to the passed file. Returns true on success. | ||
| 224 | bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, | ||
| 225 | const GLShader::ShaderEntries& entries); | ||
| 226 | |||
| 227 | /// Opens current game's transferable file and write it's header if it doesn't exist | 213 | /// Opens current game's transferable file and write it's header if it doesn't exist |
| 228 | FileUtil::IOFile AppendTransferableFile() const; | 214 | FileUtil::IOFile AppendTransferableFile() const; |
| 229 | 215 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index b5a43e79e..0e22eede9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp | |||
| @@ -16,17 +16,8 @@ using VideoCommon::Shader::CompilerSettings; | |||
| 16 | using VideoCommon::Shader::ProgramCode; | 16 | using VideoCommon::Shader::ProgramCode; |
| 17 | using VideoCommon::Shader::ShaderIR; | 17 | using VideoCommon::Shader::ShaderIR; |
| 18 | 18 | ||
| 19 | static constexpr u32 PROGRAM_OFFSET = 10; | 19 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { |
| 20 | static constexpr u32 COMPUTE_OFFSET = 0; | 20 | std::string out = GetCommonDeclarations(); |
| 21 | |||
| 22 | static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; | ||
| 23 | |||
| 24 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | ||
| 25 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||
| 26 | |||
| 27 | std::string out = "// Shader Unique Id: VS" + id + "\n\n"; | ||
| 28 | out += GetCommonDeclarations(); | ||
| 29 | |||
| 30 | out += R"( | 21 | out += R"( |
| 31 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | 22 | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { |
| 32 | vec4 viewport_flip; | 23 | vec4 viewport_flip; |
| @@ -34,17 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| 34 | }; | 25 | }; |
| 35 | 26 | ||
| 36 | )"; | 27 | )"; |
| 37 | 28 | const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; | |
| 38 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); | 29 | out += Decompile(device, ir, stage, "vertex"); |
| 39 | const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | 30 | if (ir_b) { |
| 40 | ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | 31 | out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); |
| 41 | out += program.first; | ||
| 42 | |||
| 43 | if (setup.IsDualProgram()) { | ||
| 44 | const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, | ||
| 45 | settings); | ||
| 46 | ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); | ||
| 47 | out += program_b.first; | ||
| 48 | } | 32 | } |
| 49 | 33 | ||
| 50 | out += R"( | 34 | out += R"( |
| @@ -52,7 +36,7 @@ void main() { | |||
| 52 | execute_vertex(); | 36 | execute_vertex(); |
| 53 | )"; | 37 | )"; |
| 54 | 38 | ||
| 55 | if (setup.IsDualProgram()) { | 39 | if (ir_b) { |
| 56 | out += " execute_vertex_b();"; | 40 | out += " execute_vertex_b();"; |
| 57 | } | 41 | } |
| 58 | 42 | ||
| @@ -66,17 +50,13 @@ void main() { | |||
| 66 | // Viewport can be flipped, which is unsupported by glViewport | 50 | // Viewport can be flipped, which is unsupported by glViewport |
| 67 | gl_Position.xy *= viewport_flip.xy; | 51 | gl_Position.xy *= viewport_flip.xy; |
| 68 | } | 52 | } |
| 69 | })"; | 53 | } |
| 70 | 54 | )"; | |
| 71 | return {std::move(out), std::move(program.second)}; | 55 | return out; |
| 72 | } | 56 | } |
| 73 | 57 | ||
| 74 | ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { | 58 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { |
| 75 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 59 | std::string out = GetCommonDeclarations(); |
| 76 | |||
| 77 | std::string out = "// Shader Unique Id: GS" + id + "\n\n"; | ||
| 78 | out += GetCommonDeclarations(); | ||
| 79 | |||
| 80 | out += R"( | 60 | out += R"( |
| 81 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | 61 | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { |
| 82 | vec4 viewport_flip; | 62 | vec4 viewport_flip; |
| @@ -84,25 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| 84 | }; | 64 | }; |
| 85 | 65 | ||
| 86 | )"; | 66 | )"; |
| 87 | 67 | out += Decompile(device, ir, ProgramType::Geometry, "geometry"); | |
| 88 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); | ||
| 89 | ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); | ||
| 90 | out += program.first; | ||
| 91 | 68 | ||
| 92 | out += R"( | 69 | out += R"( |
| 93 | void main() { | 70 | void main() { |
| 94 | execute_geometry(); | 71 | execute_geometry(); |
| 95 | };)"; | 72 | } |
| 96 | 73 | )"; | |
| 97 | return {std::move(out), std::move(program.second)}; | 74 | return out; |
| 98 | } | 75 | } |
| 99 | 76 | ||
| 100 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { | 77 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { |
| 101 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 78 | std::string out = GetCommonDeclarations(); |
| 102 | |||
| 103 | std::string out = "// Shader Unique Id: FS" + id + "\n\n"; | ||
| 104 | out += GetCommonDeclarations(); | ||
| 105 | |||
| 106 | out += R"( | 79 | out += R"( |
| 107 | layout (location = 0) out vec4 FragColor0; | 80 | layout (location = 0) out vec4 FragColor0; |
| 108 | layout (location = 1) out vec4 FragColor1; | 81 | layout (location = 1) out vec4 FragColor1; |
| @@ -119,36 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 119 | }; | 92 | }; |
| 120 | 93 | ||
| 121 | )"; | 94 | )"; |
| 122 | 95 | out += Decompile(device, ir, ProgramType::Fragment, "fragment"); | |
| 123 | const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); | ||
| 124 | ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); | ||
| 125 | out += program.first; | ||
| 126 | 96 | ||
| 127 | out += R"( | 97 | out += R"( |
| 128 | void main() { | 98 | void main() { |
| 129 | execute_fragment(); | 99 | execute_fragment(); |
| 130 | } | 100 | } |
| 131 | |||
| 132 | )"; | 101 | )"; |
| 133 | return {std::move(out), std::move(program.second)}; | 102 | return out; |
| 134 | } | 103 | } |
| 135 | 104 | ||
| 136 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | 105 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { |
| 137 | const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | 106 | std::string out = GetCommonDeclarations(); |
| 138 | 107 | out += Decompile(device, ir, ProgramType::Compute, "compute"); | |
| 139 | std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||
| 140 | out += GetCommonDeclarations(); | ||
| 141 | |||
| 142 | const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings); | ||
| 143 | ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||
| 144 | out += program.first; | ||
| 145 | |||
| 146 | out += R"( | 108 | out += R"( |
| 147 | void main() { | 109 | void main() { |
| 148 | execute_compute(); | 110 | execute_compute(); |
| 149 | } | 111 | } |
| 150 | )"; | 112 | )"; |
| 151 | return {std::move(out), std::move(program.second)}; | 113 | return out; |
| 152 | } | 114 | } |
| 153 | 115 | ||
| 154 | } // namespace OpenGL::GLShader | 116 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3833e88ab..cba2be9f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h | |||
| @@ -17,44 +17,18 @@ class Device; | |||
| 17 | namespace OpenGL::GLShader { | 17 | namespace OpenGL::GLShader { |
| 18 | 18 | ||
| 19 | using VideoCommon::Shader::ProgramCode; | 19 | using VideoCommon::Shader::ProgramCode; |
| 20 | 20 | using VideoCommon::Shader::ShaderIR; | |
| 21 | struct ShaderSetup { | ||
| 22 | explicit ShaderSetup(ProgramCode program_code) { | ||
| 23 | program.code = std::move(program_code); | ||
| 24 | } | ||
| 25 | |||
| 26 | struct { | ||
| 27 | ProgramCode code; | ||
| 28 | ProgramCode code_b; // Used for dual vertex shaders | ||
| 29 | u64 unique_identifier; | ||
| 30 | std::size_t size_a; | ||
| 31 | std::size_t size_b; | ||
| 32 | } program; | ||
| 33 | |||
| 34 | /// Used in scenarios where we have a dual vertex shaders | ||
| 35 | void SetProgramB(ProgramCode program_b) { | ||
| 36 | program.code_b = std::move(program_b); | ||
| 37 | has_program_b = true; | ||
| 38 | } | ||
| 39 | |||
| 40 | bool IsDualProgram() const { | ||
| 41 | return has_program_b; | ||
| 42 | } | ||
| 43 | |||
| 44 | private: | ||
| 45 | bool has_program_b{}; | ||
| 46 | }; | ||
| 47 | 21 | ||
| 48 | /// Generates the GLSL vertex shader program source code for the given VS program | 22 | /// Generates the GLSL vertex shader program source code for the given VS program |
| 49 | ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); | 23 | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); |
| 50 | 24 | ||
| 51 | /// Generates the GLSL geometry shader program source code for the given GS program | 25 | /// Generates the GLSL geometry shader program source code for the given GS program |
| 52 | ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); | 26 | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); |
| 53 | 27 | ||
| 54 | /// Generates the GLSL fragment shader program source code for the given FS program | 28 | /// Generates the GLSL fragment shader program source code for the given FS program |
| 55 | ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | 29 | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); |
| 56 | 30 | ||
| 57 | /// Generates the GLSL compute shader program source code for the given CS program | 31 | /// Generates the GLSL compute shader program source code for the given CS program |
| 58 | ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | 32 | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); |
| 59 | 33 | ||
| 60 | } // namespace OpenGL::GLShader | 34 | } // namespace OpenGL::GLShader |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 173b76c4e..2f9bfd7e4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -111,7 +111,8 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 111 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | 111 | {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
| 112 | true}, // DXT45_SRGB | 112 | true}, // DXT45_SRGB |
| 113 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, | 113 | {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
| 114 | true}, // BC7U_SRGB | 114 | true}, // BC7U_SRGB |
| 115 | {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, ComponentType::UNorm, false}, // R4G4B4A4U | ||
| 115 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB | 116 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB |
| 116 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB | 117 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB |
| 117 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB | 118 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB |
| @@ -120,6 +121,16 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format | |||
| 120 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB | 121 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB |
| 121 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 | 122 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 |
| 122 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB | 123 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB |
| 124 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6 | ||
| 125 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X6_SRGB | ||
| 126 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10 | ||
| 127 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X10_SRGB | ||
| 128 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12 | ||
| 129 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_12X12_SRGB | ||
| 130 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6 | ||
| 131 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X6_SRGB | ||
| 132 | {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5 | ||
| 133 | {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_6X5_SRGB | ||
| 123 | 134 | ||
| 124 | // Depth formats | 135 | // Depth formats |
| 125 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F | 136 | {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst | |||
| 102 | RendererOpenGL::~RendererOpenGL() = default; | 102 | RendererOpenGL::~RendererOpenGL() = default; |
| 103 | 103 | ||
| 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 104 | void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| 105 | system.GetPerfStats().EndSystemFrame(); | ||
| 106 | |||
| 107 | // Maintain the rasterizer's state as a priority | 105 | // Maintain the rasterizer's state as a priority |
| 108 | OpenGLState prev_state = OpenGLState::GetCurState(); | 106 | OpenGLState prev_state = OpenGLState::GetCurState(); |
| 109 | state.AllDirty(); | 107 | state.AllDirty(); |
| @@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 135 | 133 | ||
| 136 | render_window.PollEvents(); | 134 | render_window.PollEvents(); |
| 137 | 135 | ||
| 138 | system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||
| 139 | system.GetPerfStats().BeginSystemFrame(); | ||
| 140 | |||
| 141 | // Restore the rasterizer state | 136 | // Restore the rasterizer state |
| 142 | prev_state.AllDirty(); | 137 | prev_state.AllDirty(); |
| 143 | prev_state.Apply(); | 138 | prev_state.Apply(); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8bcd04221..42cf068b6 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -1648,32 +1648,32 @@ class ExprDecompiler { | |||
| 1648 | public: | 1648 | public: |
| 1649 | explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} | 1649 | explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} |
| 1650 | 1650 | ||
| 1651 | Id operator()(VideoCommon::Shader::ExprAnd& expr) { | 1651 | Id operator()(const ExprAnd& expr) { |
| 1652 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | 1652 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); |
| 1653 | const Id op1 = Visit(expr.operand1); | 1653 | const Id op1 = Visit(expr.operand1); |
| 1654 | const Id op2 = Visit(expr.operand2); | 1654 | const Id op2 = Visit(expr.operand2); |
| 1655 | return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2)); | 1655 | return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2)); |
| 1656 | } | 1656 | } |
| 1657 | 1657 | ||
| 1658 | Id operator()(VideoCommon::Shader::ExprOr& expr) { | 1658 | Id operator()(const ExprOr& expr) { |
| 1659 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | 1659 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); |
| 1660 | const Id op1 = Visit(expr.operand1); | 1660 | const Id op1 = Visit(expr.operand1); |
| 1661 | const Id op2 = Visit(expr.operand2); | 1661 | const Id op2 = Visit(expr.operand2); |
| 1662 | return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2)); | 1662 | return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2)); |
| 1663 | } | 1663 | } |
| 1664 | 1664 | ||
| 1665 | Id operator()(VideoCommon::Shader::ExprNot& expr) { | 1665 | Id operator()(const ExprNot& expr) { |
| 1666 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); | 1666 | const Id type_def = decomp.GetTypeDefinition(Type::Bool); |
| 1667 | const Id op1 = Visit(expr.operand1); | 1667 | const Id op1 = Visit(expr.operand1); |
| 1668 | return decomp.Emit(decomp.OpLogicalNot(type_def, op1)); | 1668 | return decomp.Emit(decomp.OpLogicalNot(type_def, op1)); |
| 1669 | } | 1669 | } |
| 1670 | 1670 | ||
| 1671 | Id operator()(VideoCommon::Shader::ExprPredicate& expr) { | 1671 | Id operator()(const ExprPredicate& expr) { |
| 1672 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); | 1672 | const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate); |
| 1673 | return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred))); | 1673 | return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred))); |
| 1674 | } | 1674 | } |
| 1675 | 1675 | ||
| 1676 | Id operator()(VideoCommon::Shader::ExprCondCode& expr) { | 1676 | Id operator()(const ExprCondCode& expr) { |
| 1677 | const Node cc = decomp.ir.GetConditionCode(expr.cc); | 1677 | const Node cc = decomp.ir.GetConditionCode(expr.cc); |
| 1678 | Id target; | 1678 | Id target; |
| 1679 | 1679 | ||
| @@ -1682,10 +1682,13 @@ public: | |||
| 1682 | switch (index) { | 1682 | switch (index) { |
| 1683 | case Tegra::Shader::Pred::NeverExecute: | 1683 | case Tegra::Shader::Pred::NeverExecute: |
| 1684 | target = decomp.v_false; | 1684 | target = decomp.v_false; |
| 1685 | break; | ||
| 1685 | case Tegra::Shader::Pred::UnusedIndex: | 1686 | case Tegra::Shader::Pred::UnusedIndex: |
| 1686 | target = decomp.v_true; | 1687 | target = decomp.v_true; |
| 1688 | break; | ||
| 1687 | default: | 1689 | default: |
| 1688 | target = decomp.predicates.at(index); | 1690 | target = decomp.predicates.at(index); |
| 1691 | break; | ||
| 1689 | } | 1692 | } |
| 1690 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { | 1693 | } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) { |
| 1691 | target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); | 1694 | target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag())); |
| @@ -1693,15 +1696,22 @@ public: | |||
| 1693 | return decomp.Emit(decomp.OpLoad(decomp.t_bool, target)); | 1696 | return decomp.Emit(decomp.OpLoad(decomp.t_bool, target)); |
| 1694 | } | 1697 | } |
| 1695 | 1698 | ||
| 1696 | Id operator()(VideoCommon::Shader::ExprVar& expr) { | 1699 | Id operator()(const ExprVar& expr) { |
| 1697 | return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index))); | 1700 | return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index))); |
| 1698 | } | 1701 | } |
| 1699 | 1702 | ||
| 1700 | Id operator()(VideoCommon::Shader::ExprBoolean& expr) { | 1703 | Id operator()(const ExprBoolean& expr) { |
| 1701 | return expr.value ? decomp.v_true : decomp.v_false; | 1704 | return expr.value ? decomp.v_true : decomp.v_false; |
| 1702 | } | 1705 | } |
| 1703 | 1706 | ||
| 1704 | Id Visit(VideoCommon::Shader::Expr& node) { | 1707 | Id operator()(const ExprGprEqual& expr) { |
| 1708 | const Id target = decomp.Constant(decomp.t_uint, expr.value); | ||
| 1709 | const Id gpr = decomp.BitcastTo<Type::Uint>( | ||
| 1710 | decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)))); | ||
| 1711 | return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target)); | ||
| 1712 | } | ||
| 1713 | |||
| 1714 | Id Visit(const Expr& node) { | ||
| 1705 | return std::visit(*this, *node); | 1715 | return std::visit(*this, *node); |
| 1706 | } | 1716 | } |
| 1707 | 1717 | ||
| @@ -1713,7 +1723,7 @@ class ASTDecompiler { | |||
| 1713 | public: | 1723 | public: |
| 1714 | explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} | 1724 | explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {} |
| 1715 | 1725 | ||
| 1716 | void operator()(VideoCommon::Shader::ASTProgram& ast) { | 1726 | void operator()(const ASTProgram& ast) { |
| 1717 | ASTNode current = ast.nodes.GetFirst(); | 1727 | ASTNode current = ast.nodes.GetFirst(); |
| 1718 | while (current) { | 1728 | while (current) { |
| 1719 | Visit(current); | 1729 | Visit(current); |
| @@ -1721,7 +1731,7 @@ public: | |||
| 1721 | } | 1731 | } |
| 1722 | } | 1732 | } |
| 1723 | 1733 | ||
| 1724 | void operator()(VideoCommon::Shader::ASTIfThen& ast) { | 1734 | void operator()(const ASTIfThen& ast) { |
| 1725 | ExprDecompiler expr_parser{decomp}; | 1735 | ExprDecompiler expr_parser{decomp}; |
| 1726 | const Id condition = expr_parser.Visit(ast.condition); | 1736 | const Id condition = expr_parser.Visit(ast.condition); |
| 1727 | const Id then_label = decomp.OpLabel(); | 1737 | const Id then_label = decomp.OpLabel(); |
| @@ -1738,33 +1748,33 @@ public: | |||
| 1738 | decomp.Emit(endif_label); | 1748 | decomp.Emit(endif_label); |
| 1739 | } | 1749 | } |
| 1740 | 1750 | ||
| 1741 | void operator()(VideoCommon::Shader::ASTIfElse& ast) { | 1751 | void operator()([[maybe_unused]] const ASTIfElse& ast) { |
| 1742 | UNREACHABLE(); | 1752 | UNREACHABLE(); |
| 1743 | } | 1753 | } |
| 1744 | 1754 | ||
| 1745 | void operator()(VideoCommon::Shader::ASTBlockEncoded& ast) { | 1755 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) { |
| 1746 | UNREACHABLE(); | 1756 | UNREACHABLE(); |
| 1747 | } | 1757 | } |
| 1748 | 1758 | ||
| 1749 | void operator()(VideoCommon::Shader::ASTBlockDecoded& ast) { | 1759 | void operator()(const ASTBlockDecoded& ast) { |
| 1750 | decomp.VisitBasicBlock(ast.nodes); | 1760 | decomp.VisitBasicBlock(ast.nodes); |
| 1751 | } | 1761 | } |
| 1752 | 1762 | ||
| 1753 | void operator()(VideoCommon::Shader::ASTVarSet& ast) { | 1763 | void operator()(const ASTVarSet& ast) { |
| 1754 | ExprDecompiler expr_parser{decomp}; | 1764 | ExprDecompiler expr_parser{decomp}; |
| 1755 | const Id condition = expr_parser.Visit(ast.condition); | 1765 | const Id condition = expr_parser.Visit(ast.condition); |
| 1756 | decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition)); | 1766 | decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition)); |
| 1757 | } | 1767 | } |
| 1758 | 1768 | ||
| 1759 | void operator()(VideoCommon::Shader::ASTLabel& ast) { | 1769 | void operator()([[maybe_unused]] const ASTLabel& ast) { |
| 1760 | // Do nothing | 1770 | // Do nothing |
| 1761 | } | 1771 | } |
| 1762 | 1772 | ||
| 1763 | void operator()(VideoCommon::Shader::ASTGoto& ast) { | 1773 | void operator()([[maybe_unused]] const ASTGoto& ast) { |
| 1764 | UNREACHABLE(); | 1774 | UNREACHABLE(); |
| 1765 | } | 1775 | } |
| 1766 | 1776 | ||
| 1767 | void operator()(VideoCommon::Shader::ASTDoWhile& ast) { | 1777 | void operator()(const ASTDoWhile& ast) { |
| 1768 | const Id loop_label = decomp.OpLabel(); | 1778 | const Id loop_label = decomp.OpLabel(); |
| 1769 | const Id endloop_label = decomp.OpLabel(); | 1779 | const Id endloop_label = decomp.OpLabel(); |
| 1770 | const Id loop_start_block = decomp.OpLabel(); | 1780 | const Id loop_start_block = decomp.OpLabel(); |
| @@ -1787,7 +1797,7 @@ public: | |||
| 1787 | decomp.Emit(endloop_label); | 1797 | decomp.Emit(endloop_label); |
| 1788 | } | 1798 | } |
| 1789 | 1799 | ||
| 1790 | void operator()(VideoCommon::Shader::ASTReturn& ast) { | 1800 | void operator()(const ASTReturn& ast) { |
| 1791 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { | 1801 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { |
| 1792 | ExprDecompiler expr_parser{decomp}; | 1802 | ExprDecompiler expr_parser{decomp}; |
| 1793 | const Id condition = expr_parser.Visit(ast.condition); | 1803 | const Id condition = expr_parser.Visit(ast.condition); |
| @@ -1817,7 +1827,7 @@ public: | |||
| 1817 | } | 1827 | } |
| 1818 | } | 1828 | } |
| 1819 | 1829 | ||
| 1820 | void operator()(VideoCommon::Shader::ASTBreak& ast) { | 1830 | void operator()(const ASTBreak& ast) { |
| 1821 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { | 1831 | if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) { |
| 1822 | ExprDecompiler expr_parser{decomp}; | 1832 | ExprDecompiler expr_parser{decomp}; |
| 1823 | const Id condition = expr_parser.Visit(ast.condition); | 1833 | const Id condition = expr_parser.Visit(ast.condition); |
| @@ -1837,7 +1847,7 @@ public: | |||
| 1837 | } | 1847 | } |
| 1838 | } | 1848 | } |
| 1839 | 1849 | ||
| 1840 | void Visit(VideoCommon::Shader::ASTNode& node) { | 1850 | void Visit(const ASTNode& node) { |
| 1841 | std::visit(*this, *node->GetInnerData()); | 1851 | std::visit(*this, *node->GetInnerData()); |
| 1842 | } | 1852 | } |
| 1843 | 1853 | ||
| @@ -1853,9 +1863,11 @@ void SPIRVDecompiler::DecompileAST() { | |||
| 1853 | Name(id, fmt::format("flow_var_{}", i)); | 1863 | Name(id, fmt::format("flow_var_{}", i)); |
| 1854 | flow_variables.emplace(i, AddGlobalVariable(id)); | 1864 | flow_variables.emplace(i, AddGlobalVariable(id)); |
| 1855 | } | 1865 | } |
| 1866 | |||
| 1867 | const ASTNode program = ir.GetASTProgram(); | ||
| 1856 | ASTDecompiler decompiler{*this}; | 1868 | ASTDecompiler decompiler{*this}; |
| 1857 | VideoCommon::Shader::ASTNode program = ir.GetASTProgram(); | ||
| 1858 | decompiler.Visit(program); | 1869 | decompiler.Visit(program); |
| 1870 | |||
| 1859 | const Id next_block = OpLabel(); | 1871 | const Id next_block = OpLabel(); |
| 1860 | Emit(OpBranch(next_block)); | 1872 | Emit(OpBranch(next_block)); |
| 1861 | Emit(next_block); | 1873 | Emit(next_block); |
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp index 436d45f4b..3f96d9076 100644 --- a/src/video_core/shader/ast.cpp +++ b/src/video_core/shader/ast.cpp | |||
| @@ -3,6 +3,9 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <string> | 5 | #include <string> |
| 6 | #include <string_view> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 6 | 9 | ||
| 7 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 8 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -225,11 +228,16 @@ public: | |||
| 225 | inner += expr.value ? "true" : "false"; | 228 | inner += expr.value ? "true" : "false"; |
| 226 | } | 229 | } |
| 227 | 230 | ||
| 231 | void operator()(const ExprGprEqual& expr) { | ||
| 232 | inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; | ||
| 233 | } | ||
| 234 | |||
| 228 | const std::string& GetResult() const { | 235 | const std::string& GetResult() const { |
| 229 | return inner; | 236 | return inner; |
| 230 | } | 237 | } |
| 231 | 238 | ||
| 232 | std::string inner{}; | 239 | private: |
| 240 | std::string inner; | ||
| 233 | }; | 241 | }; |
| 234 | 242 | ||
| 235 | class ASTPrinter { | 243 | class ASTPrinter { |
| @@ -249,7 +257,7 @@ public: | |||
| 249 | void operator()(const ASTIfThen& ast) { | 257 | void operator()(const ASTIfThen& ast) { |
| 250 | ExprPrinter expr_parser{}; | 258 | ExprPrinter expr_parser{}; |
| 251 | std::visit(expr_parser, *ast.condition); | 259 | std::visit(expr_parser, *ast.condition); |
| 252 | inner += Ident() + "if (" + expr_parser.GetResult() + ") {\n"; | 260 | inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); |
| 253 | scope++; | 261 | scope++; |
| 254 | ASTNode current = ast.nodes.GetFirst(); | 262 | ASTNode current = ast.nodes.GetFirst(); |
| 255 | while (current) { | 263 | while (current) { |
| @@ -257,11 +265,13 @@ public: | |||
| 257 | current = current->GetNext(); | 265 | current = current->GetNext(); |
| 258 | } | 266 | } |
| 259 | scope--; | 267 | scope--; |
| 260 | inner += Ident() + "}\n"; | 268 | inner += fmt::format("{}}}\n", Indent()); |
| 261 | } | 269 | } |
| 262 | 270 | ||
| 263 | void operator()(const ASTIfElse& ast) { | 271 | void operator()(const ASTIfElse& ast) { |
| 264 | inner += Ident() + "else {\n"; | 272 | inner += Indent(); |
| 273 | inner += "else {\n"; | ||
| 274 | |||
| 265 | scope++; | 275 | scope++; |
| 266 | ASTNode current = ast.nodes.GetFirst(); | 276 | ASTNode current = ast.nodes.GetFirst(); |
| 267 | while (current) { | 277 | while (current) { |
| @@ -269,40 +279,41 @@ public: | |||
| 269 | current = current->GetNext(); | 279 | current = current->GetNext(); |
| 270 | } | 280 | } |
| 271 | scope--; | 281 | scope--; |
| 272 | inner += Ident() + "}\n"; | 282 | |
| 283 | inner += Indent(); | ||
| 284 | inner += "}\n"; | ||
| 273 | } | 285 | } |
| 274 | 286 | ||
| 275 | void operator()(const ASTBlockEncoded& ast) { | 287 | void operator()(const ASTBlockEncoded& ast) { |
| 276 | inner += Ident() + "Block(" + std::to_string(ast.start) + ", " + std::to_string(ast.end) + | 288 | inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); |
| 277 | ");\n"; | ||
| 278 | } | 289 | } |
| 279 | 290 | ||
| 280 | void operator()(const ASTBlockDecoded& ast) { | 291 | void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { |
| 281 | inner += Ident() + "Block;\n"; | 292 | inner += Indent(); |
| 293 | inner += "Block;\n"; | ||
| 282 | } | 294 | } |
| 283 | 295 | ||
| 284 | void operator()(const ASTVarSet& ast) { | 296 | void operator()(const ASTVarSet& ast) { |
| 285 | ExprPrinter expr_parser{}; | 297 | ExprPrinter expr_parser{}; |
| 286 | std::visit(expr_parser, *ast.condition); | 298 | std::visit(expr_parser, *ast.condition); |
| 287 | inner += | 299 | inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); |
| 288 | Ident() + "V" + std::to_string(ast.index) + " := " + expr_parser.GetResult() + ";\n"; | ||
| 289 | } | 300 | } |
| 290 | 301 | ||
| 291 | void operator()(const ASTLabel& ast) { | 302 | void operator()(const ASTLabel& ast) { |
| 292 | inner += "Label_" + std::to_string(ast.index) + ":\n"; | 303 | inner += fmt::format("Label_{}:\n", ast.index); |
| 293 | } | 304 | } |
| 294 | 305 | ||
| 295 | void operator()(const ASTGoto& ast) { | 306 | void operator()(const ASTGoto& ast) { |
| 296 | ExprPrinter expr_parser{}; | 307 | ExprPrinter expr_parser{}; |
| 297 | std::visit(expr_parser, *ast.condition); | 308 | std::visit(expr_parser, *ast.condition); |
| 298 | inner += Ident() + "(" + expr_parser.GetResult() + ") -> goto Label_" + | 309 | inner += |
| 299 | std::to_string(ast.label) + ";\n"; | 310 | fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); |
| 300 | } | 311 | } |
| 301 | 312 | ||
| 302 | void operator()(const ASTDoWhile& ast) { | 313 | void operator()(const ASTDoWhile& ast) { |
| 303 | ExprPrinter expr_parser{}; | 314 | ExprPrinter expr_parser{}; |
| 304 | std::visit(expr_parser, *ast.condition); | 315 | std::visit(expr_parser, *ast.condition); |
| 305 | inner += Ident() + "do {\n"; | 316 | inner += fmt::format("{}do {{\n", Indent()); |
| 306 | scope++; | 317 | scope++; |
| 307 | ASTNode current = ast.nodes.GetFirst(); | 318 | ASTNode current = ast.nodes.GetFirst(); |
| 308 | while (current) { | 319 | while (current) { |
| @@ -310,32 +321,23 @@ public: | |||
| 310 | current = current->GetNext(); | 321 | current = current->GetNext(); |
| 311 | } | 322 | } |
| 312 | scope--; | 323 | scope--; |
| 313 | inner += Ident() + "} while (" + expr_parser.GetResult() + ");\n"; | 324 | inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); |
| 314 | } | 325 | } |
| 315 | 326 | ||
| 316 | void operator()(const ASTReturn& ast) { | 327 | void operator()(const ASTReturn& ast) { |
| 317 | ExprPrinter expr_parser{}; | 328 | ExprPrinter expr_parser{}; |
| 318 | std::visit(expr_parser, *ast.condition); | 329 | std::visit(expr_parser, *ast.condition); |
| 319 | inner += Ident() + "(" + expr_parser.GetResult() + ") -> " + | 330 | inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), |
| 320 | (ast.kills ? "discard" : "exit") + ";\n"; | 331 | ast.kills ? "discard" : "exit"); |
| 321 | } | 332 | } |
| 322 | 333 | ||
| 323 | void operator()(const ASTBreak& ast) { | 334 | void operator()(const ASTBreak& ast) { |
| 324 | ExprPrinter expr_parser{}; | 335 | ExprPrinter expr_parser{}; |
| 325 | std::visit(expr_parser, *ast.condition); | 336 | std::visit(expr_parser, *ast.condition); |
| 326 | inner += Ident() + "(" + expr_parser.GetResult() + ") -> break;\n"; | 337 | inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); |
| 327 | } | 338 | } |
| 328 | 339 | ||
| 329 | std::string& Ident() { | 340 | void Visit(const ASTNode& node) { |
| 330 | if (memo_scope == scope) { | ||
| 331 | return tabs_memo; | ||
| 332 | } | ||
| 333 | tabs_memo = tabs.substr(0, scope * 2); | ||
| 334 | memo_scope = scope; | ||
| 335 | return tabs_memo; | ||
| 336 | } | ||
| 337 | |||
| 338 | void Visit(ASTNode& node) { | ||
| 339 | std::visit(*this, *node->GetInnerData()); | 341 | std::visit(*this, *node->GetInnerData()); |
| 340 | } | 342 | } |
| 341 | 343 | ||
| @@ -344,16 +346,29 @@ public: | |||
| 344 | } | 346 | } |
| 345 | 347 | ||
| 346 | private: | 348 | private: |
| 349 | std::string_view Indent() { | ||
| 350 | if (space_segment_scope == scope) { | ||
| 351 | return space_segment; | ||
| 352 | } | ||
| 353 | |||
| 354 | // Ensure that we don't exceed our view. | ||
| 355 | ASSERT(scope * 2 < spaces.size()); | ||
| 356 | |||
| 357 | space_segment = spaces.substr(0, scope * 2); | ||
| 358 | space_segment_scope = scope; | ||
| 359 | return space_segment; | ||
| 360 | } | ||
| 361 | |||
| 347 | std::string inner{}; | 362 | std::string inner{}; |
| 348 | u32 scope{}; | 363 | std::string_view space_segment; |
| 349 | 364 | ||
| 350 | std::string tabs_memo{}; | 365 | u32 scope{}; |
| 351 | u32 memo_scope{}; | 366 | u32 space_segment_scope{}; |
| 352 | 367 | ||
| 353 | static constexpr std::string_view tabs{" "}; | 368 | static constexpr std::string_view spaces{" "}; |
| 354 | }; | 369 | }; |
| 355 | 370 | ||
| 356 | std::string ASTManager::Print() { | 371 | std::string ASTManager::Print() const { |
| 357 | ASTPrinter printer{}; | 372 | ASTPrinter printer{}; |
| 358 | printer.Visit(main_node); | 373 | printer.Visit(main_node); |
| 359 | return printer.GetResult(); | 374 | return printer.GetResult(); |
| @@ -549,13 +564,13 @@ bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) co | |||
| 549 | return min->GetParent() == max->GetParent(); | 564 | return min->GetParent() == max->GetParent(); |
| 550 | } | 565 | } |
| 551 | 566 | ||
| 552 | void ASTManager::ShowCurrentState(std::string_view state) { | 567 | void ASTManager::ShowCurrentState(std::string_view state) const { |
| 553 | LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); | 568 | LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); |
| 554 | SanityCheck(); | 569 | SanityCheck(); |
| 555 | } | 570 | } |
| 556 | 571 | ||
| 557 | void ASTManager::SanityCheck() { | 572 | void ASTManager::SanityCheck() const { |
| 558 | for (auto& label : labels) { | 573 | for (const auto& label : labels) { |
| 559 | if (!label->GetParent()) { | 574 | if (!label->GetParent()) { |
| 560 | LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); | 575 | LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); |
| 561 | } | 576 | } |
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index d7bf11821..a2f0044ba 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h | |||
| @@ -328,13 +328,13 @@ public: | |||
| 328 | 328 | ||
| 329 | void InsertReturn(Expr condition, bool kills); | 329 | void InsertReturn(Expr condition, bool kills); |
| 330 | 330 | ||
| 331 | std::string Print(); | 331 | std::string Print() const; |
| 332 | 332 | ||
| 333 | void Decompile(); | 333 | void Decompile(); |
| 334 | 334 | ||
| 335 | void ShowCurrentState(std::string_view state); | 335 | void ShowCurrentState(std::string_view state) const; |
| 336 | 336 | ||
| 337 | void SanityCheck(); | 337 | void SanityCheck() const; |
| 338 | 338 | ||
| 339 | void Clear(); | 339 | void Clear(); |
| 340 | 340 | ||
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp new file mode 100644 index 000000000..fe467608e --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <memory> | ||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/const_buffer_locker.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Engines::SamplerDescriptor; | ||
| 17 | |||
| 18 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | ||
| 19 | : stage{shader_stage} {} | ||
| 20 | |||
| 21 | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 22 | Tegra::Engines::ConstBufferEngineInterface& engine) | ||
| 23 | : stage{shader_stage}, engine{&engine} {} | ||
| 24 | |||
| 25 | ConstBufferLocker::~ConstBufferLocker() = default; | ||
| 26 | |||
| 27 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | ||
| 28 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 29 | const auto iter = keys.find(key); | ||
| 30 | if (iter != keys.end()) { | ||
| 31 | return iter->second; | ||
| 32 | } | ||
| 33 | if (!engine) { | ||
| 34 | return std::nullopt; | ||
| 35 | } | ||
| 36 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 37 | keys.emplace(key, value); | ||
| 38 | return value; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { | ||
| 42 | const u32 key = offset; | ||
| 43 | const auto iter = bound_samplers.find(key); | ||
| 44 | if (iter != bound_samplers.end()) { | ||
| 45 | return iter->second; | ||
| 46 | } | ||
| 47 | if (!engine) { | ||
| 48 | return std::nullopt; | ||
| 49 | } | ||
| 50 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 51 | bound_samplers.emplace(key, value); | ||
| 52 | return value; | ||
| 53 | } | ||
| 54 | |||
| 55 | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( | ||
| 56 | u32 buffer, u32 offset) { | ||
| 57 | const std::pair key = {buffer, offset}; | ||
| 58 | const auto iter = bindless_samplers.find(key); | ||
| 59 | if (iter != bindless_samplers.end()) { | ||
| 60 | return iter->second; | ||
| 61 | } | ||
| 62 | if (!engine) { | ||
| 63 | return std::nullopt; | ||
| 64 | } | ||
| 65 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 66 | bindless_samplers.emplace(key, value); | ||
| 67 | return value; | ||
| 68 | } | ||
| 69 | |||
| 70 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 71 | keys.insert_or_assign({buffer, offset}, value); | ||
| 72 | } | ||
| 73 | |||
| 74 | void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 75 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 76 | } | ||
| 77 | |||
| 78 | void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 79 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 80 | } | ||
| 81 | |||
| 82 | bool ConstBufferLocker::IsConsistent() const { | ||
| 83 | if (!engine) { | ||
| 84 | return false; | ||
| 85 | } | ||
| 86 | return std::all_of(keys.begin(), keys.end(), | ||
| 87 | [this](const auto& pair) { | ||
| 88 | const auto [cbuf, offset] = pair.first; | ||
| 89 | const auto value = pair.second; | ||
| 90 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 91 | }) && | ||
| 92 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 93 | [this](const auto& sampler) { | ||
| 94 | const auto [key, value] = sampler; | ||
| 95 | return value == engine->AccessBoundSampler(stage, key); | ||
| 96 | }) && | ||
| 97 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 98 | [this](const auto& sampler) { | ||
| 99 | const auto [cbuf, offset] = sampler.first; | ||
| 100 | const auto value = sampler.second; | ||
| 101 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 102 | }); | ||
| 103 | } | ||
| 104 | |||
| 105 | bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { | ||
| 106 | return keys == rhs.keys && bound_samplers == rhs.bound_samplers && | ||
| 107 | bindless_samplers == rhs.bindless_samplers; | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h new file mode 100644 index 000000000..600e2f3c3 --- /dev/null +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/hash.h" | ||
| 10 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 15 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 16 | using BindlessSamplerMap = | ||
| 17 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 18 | |||
| 19 | /** | ||
| 20 | * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader | ||
| 21 | * compiler. with it, the shader can obtain required data from GPU state and store it for disk | ||
| 22 | * shader compilation. | ||
| 23 | **/ | ||
| 24 | class ConstBufferLocker { | ||
| 25 | public: | ||
| 26 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | ||
| 27 | |||
| 28 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||
| 29 | Tegra::Engines::ConstBufferEngineInterface& engine); | ||
| 30 | |||
| 31 | ~ConstBufferLocker(); | ||
| 32 | |||
| 33 | /// Retrieves a key from the locker, if it's registered, it will give the registered value, if | ||
| 34 | /// not it will obtain it from maxwell3d and register it. | ||
| 35 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 36 | |||
| 37 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 38 | |||
| 39 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 40 | |||
| 41 | /// Inserts a key. | ||
| 42 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 43 | |||
| 44 | /// Inserts a bound sampler key. | ||
| 45 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 46 | |||
| 47 | /// Inserts a bindless sampler key. | ||
| 48 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 49 | |||
| 50 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are | ||
| 51 | /// the same value, false otherwise; | ||
| 52 | bool IsConsistent() const; | ||
| 53 | |||
| 54 | /// Returns true if the keys are equal to the other ones in the locker. | ||
| 55 | bool HasEqualKeys(const ConstBufferLocker& rhs) const; | ||
| 56 | |||
| 57 | /// Gives an getter to the const buffer keys in the database. | ||
| 58 | const KeyMap& GetKeys() const { | ||
| 59 | return keys; | ||
| 60 | } | ||
| 61 | |||
| 62 | /// Gets samplers database. | ||
| 63 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 64 | return bound_samplers; | ||
| 65 | } | ||
| 66 | |||
| 67 | /// Gets bindless samplers database. | ||
| 68 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 69 | return bindless_samplers; | ||
| 70 | } | ||
| 71 | |||
| 72 | private: | ||
| 73 | const Tegra::Engines::ShaderType stage; | ||
| 74 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 75 | KeyMap keys; | ||
| 76 | BoundSamplerMap bound_samplers; | ||
| 77 | BindlessSamplerMap bindless_samplers; | ||
| 78 | }; | ||
| 79 | |||
| 80 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 268d1aed0..d47c63d9f 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp | |||
| @@ -35,14 +35,20 @@ struct BlockStack { | |||
| 35 | std::stack<u32> pbk_stack{}; | 35 | std::stack<u32> pbk_stack{}; |
| 36 | }; | 36 | }; |
| 37 | 37 | ||
| 38 | struct BlockBranchInfo { | 38 | template <typename T, typename... Args> |
| 39 | Condition condition{}; | 39 | BlockBranchInfo MakeBranchInfo(Args&&... args) { |
| 40 | s32 address{exit_branch}; | 40 | static_assert(std::is_convertible_v<T, BranchData>); |
| 41 | bool kill{}; | 41 | return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); |
| 42 | bool is_sync{}; | 42 | } |
| 43 | bool is_brk{}; | 43 | |
| 44 | bool ignore{}; | 44 | bool BlockBranchIsIgnored(BlockBranchInfo first) { |
| 45 | }; | 45 | bool ignore = false; |
| 46 | if (std::holds_alternative<SingleBranch>(*first)) { | ||
| 47 | const auto branch = std::get_if<SingleBranch>(first.get()); | ||
| 48 | ignore = branch->ignore; | ||
| 49 | } | ||
| 50 | return ignore; | ||
| 51 | } | ||
| 46 | 52 | ||
| 47 | struct BlockInfo { | 53 | struct BlockInfo { |
| 48 | u32 start{}; | 54 | u32 start{}; |
| @@ -56,10 +62,11 @@ struct BlockInfo { | |||
| 56 | }; | 62 | }; |
| 57 | 63 | ||
| 58 | struct CFGRebuildState { | 64 | struct CFGRebuildState { |
| 59 | explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, | 65 | explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) |
| 60 | const u32 start) | 66 | : program_code{program_code}, start{start}, locker{locker} {} |
| 61 | : start{start}, program_code{program_code}, program_size{program_size} {} | ||
| 62 | 67 | ||
| 68 | const ProgramCode& program_code; | ||
| 69 | ConstBufferLocker& locker; | ||
| 63 | u32 start{}; | 70 | u32 start{}; |
| 64 | std::vector<BlockInfo> block_info{}; | 71 | std::vector<BlockInfo> block_info{}; |
| 65 | std::list<u32> inspect_queries{}; | 72 | std::list<u32> inspect_queries{}; |
| @@ -69,8 +76,6 @@ struct CFGRebuildState { | |||
| 69 | std::map<u32, u32> ssy_labels{}; | 76 | std::map<u32, u32> ssy_labels{}; |
| 70 | std::map<u32, u32> pbk_labels{}; | 77 | std::map<u32, u32> pbk_labels{}; |
| 71 | std::unordered_map<u32, BlockStack> stacks{}; | 78 | std::unordered_map<u32, BlockStack> stacks{}; |
| 72 | const ProgramCode& program_code; | ||
| 73 | const std::size_t program_size; | ||
| 74 | ASTManager* manager; | 79 | ASTManager* manager; |
| 75 | }; | 80 | }; |
| 76 | 81 | ||
| @@ -124,10 +129,116 @@ enum class ParseResult : u32 { | |||
| 124 | AbnormalFlow, | 129 | AbnormalFlow, |
| 125 | }; | 130 | }; |
| 126 | 131 | ||
| 132 | struct BranchIndirectInfo { | ||
| 133 | u32 buffer{}; | ||
| 134 | u32 offset{}; | ||
| 135 | u32 entries{}; | ||
| 136 | s32 relative_position{}; | ||
| 137 | }; | ||
| 138 | |||
| 139 | std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, | ||
| 140 | u32 start_address, u32 current_position) { | ||
| 141 | const u32 shader_start = state.start; | ||
| 142 | u32 pos = current_position; | ||
| 143 | BranchIndirectInfo result{}; | ||
| 144 | u64 track_register = 0; | ||
| 145 | |||
| 146 | // Step 0 Get BRX Info | ||
| 147 | const Instruction instr = {state.program_code[pos]}; | ||
| 148 | const auto opcode = OpCode::Decode(instr); | ||
| 149 | if (opcode->get().GetId() != OpCode::Id::BRX) { | ||
| 150 | return std::nullopt; | ||
| 151 | } | ||
| 152 | if (instr.brx.constant_buffer != 0) { | ||
| 153 | return std::nullopt; | ||
| 154 | } | ||
| 155 | track_register = instr.gpr8.Value(); | ||
| 156 | result.relative_position = instr.brx.GetBranchExtend(); | ||
| 157 | pos--; | ||
| 158 | bool found_track = false; | ||
| 159 | |||
| 160 | // Step 1 Track LDC | ||
| 161 | while (pos >= shader_start) { | ||
| 162 | if (IsSchedInstruction(pos, shader_start)) { | ||
| 163 | pos--; | ||
| 164 | continue; | ||
| 165 | } | ||
| 166 | const Instruction instr = {state.program_code[pos]}; | ||
| 167 | const auto opcode = OpCode::Decode(instr); | ||
| 168 | if (opcode->get().GetId() == OpCode::Id::LD_C) { | ||
| 169 | if (instr.gpr0.Value() == track_register && | ||
| 170 | instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { | ||
| 171 | result.buffer = instr.cbuf36.index.Value(); | ||
| 172 | result.offset = static_cast<u32>(instr.cbuf36.GetOffset()); | ||
| 173 | track_register = instr.gpr8.Value(); | ||
| 174 | pos--; | ||
| 175 | found_track = true; | ||
| 176 | break; | ||
| 177 | } | ||
| 178 | } | ||
| 179 | pos--; | ||
| 180 | } | ||
| 181 | |||
| 182 | if (!found_track) { | ||
| 183 | return std::nullopt; | ||
| 184 | } | ||
| 185 | found_track = false; | ||
| 186 | |||
| 187 | // Step 2 Track SHL | ||
| 188 | while (pos >= shader_start) { | ||
| 189 | if (IsSchedInstruction(pos, shader_start)) { | ||
| 190 | pos--; | ||
| 191 | continue; | ||
| 192 | } | ||
| 193 | const Instruction instr = state.program_code[pos]; | ||
| 194 | const auto opcode = OpCode::Decode(instr); | ||
| 195 | if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { | ||
| 196 | if (instr.gpr0.Value() == track_register) { | ||
| 197 | track_register = instr.gpr8.Value(); | ||
| 198 | pos--; | ||
| 199 | found_track = true; | ||
| 200 | break; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | pos--; | ||
| 204 | } | ||
| 205 | |||
| 206 | if (!found_track) { | ||
| 207 | return std::nullopt; | ||
| 208 | } | ||
| 209 | found_track = false; | ||
| 210 | |||
| 211 | // Step 3 Track IMNMX | ||
| 212 | while (pos >= shader_start) { | ||
| 213 | if (IsSchedInstruction(pos, shader_start)) { | ||
| 214 | pos--; | ||
| 215 | continue; | ||
| 216 | } | ||
| 217 | const Instruction instr = state.program_code[pos]; | ||
| 218 | const auto opcode = OpCode::Decode(instr); | ||
| 219 | if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { | ||
| 220 | if (instr.gpr0.Value() == track_register) { | ||
| 221 | track_register = instr.gpr8.Value(); | ||
| 222 | result.entries = instr.alu.GetSignedImm20_20() + 1; | ||
| 223 | pos--; | ||
| 224 | found_track = true; | ||
| 225 | break; | ||
| 226 | } | ||
| 227 | } | ||
| 228 | pos--; | ||
| 229 | } | ||
| 230 | |||
| 231 | if (!found_track) { | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | return result; | ||
| 235 | } | ||
| 236 | |||
| 127 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | 237 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { |
| 128 | u32 offset = static_cast<u32>(address); | 238 | u32 offset = static_cast<u32>(address); |
| 129 | const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); | 239 | const u32 end_address = static_cast<u32>(state.program_code.size()); |
| 130 | ParseInfo parse_info{}; | 240 | ParseInfo parse_info{}; |
| 241 | SingleBranch single_branch{}; | ||
| 131 | 242 | ||
| 132 | const auto insert_label = [](CFGRebuildState& state, u32 address) { | 243 | const auto insert_label = [](CFGRebuildState& state, u32 address) { |
| 133 | const auto pair = state.labels.emplace(address); | 244 | const auto pair = state.labels.emplace(address); |
| @@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 140 | if (offset >= end_address) { | 251 | if (offset >= end_address) { |
| 141 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | 252 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break |
| 142 | ASSERT_MSG(false, "Shader passed the current limit!"); | 253 | ASSERT_MSG(false, "Shader passed the current limit!"); |
| 143 | parse_info.branch_info.address = exit_branch; | 254 | |
| 144 | parse_info.branch_info.ignore = false; | 255 | single_branch.address = exit_branch; |
| 256 | single_branch.ignore = false; | ||
| 145 | break; | 257 | break; |
| 146 | } | 258 | } |
| 147 | if (state.registered.count(offset) != 0) { | 259 | if (state.registered.count(offset) != 0) { |
| 148 | parse_info.branch_info.address = offset; | 260 | single_branch.address = offset; |
| 149 | parse_info.branch_info.ignore = true; | 261 | single_branch.ignore = true; |
| 150 | break; | 262 | break; |
| 151 | } | 263 | } |
| 152 | if (IsSchedInstruction(offset, state.start)) { | 264 | if (IsSchedInstruction(offset, state.start)) { |
| @@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 163 | switch (opcode->get().GetId()) { | 275 | switch (opcode->get().GetId()) { |
| 164 | case OpCode::Id::EXIT: { | 276 | case OpCode::Id::EXIT: { |
| 165 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | 277 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); |
| 166 | parse_info.branch_info.condition.predicate = | 278 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |
| 167 | GetPredicate(pred_index, instr.negate_pred != 0); | 279 | if (single_branch.condition.predicate == Pred::NeverExecute) { |
| 168 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 169 | offset++; | 280 | offset++; |
| 170 | continue; | 281 | continue; |
| 171 | } | 282 | } |
| 172 | const ConditionCode cc = instr.flow_condition_code; | 283 | const ConditionCode cc = instr.flow_condition_code; |
| 173 | parse_info.branch_info.condition.cc = cc; | 284 | single_branch.condition.cc = cc; |
| 174 | if (cc == ConditionCode::F) { | 285 | if (cc == ConditionCode::F) { |
| 175 | offset++; | 286 | offset++; |
| 176 | continue; | 287 | continue; |
| 177 | } | 288 | } |
| 178 | parse_info.branch_info.address = exit_branch; | 289 | single_branch.address = exit_branch; |
| 179 | parse_info.branch_info.kill = false; | 290 | single_branch.kill = false; |
| 180 | parse_info.branch_info.is_sync = false; | 291 | single_branch.is_sync = false; |
| 181 | parse_info.branch_info.is_brk = false; | 292 | single_branch.is_brk = false; |
| 182 | parse_info.branch_info.ignore = false; | 293 | single_branch.ignore = false; |
| 183 | parse_info.end_address = offset; | 294 | parse_info.end_address = offset; |
| 295 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 296 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 297 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 184 | 298 | ||
| 185 | return {ParseResult::ControlCaught, parse_info}; | 299 | return {ParseResult::ControlCaught, parse_info}; |
| 186 | } | 300 | } |
| @@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 189 | return {ParseResult::AbnormalFlow, parse_info}; | 303 | return {ParseResult::AbnormalFlow, parse_info}; |
| 190 | } | 304 | } |
| 191 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | 305 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); |
| 192 | parse_info.branch_info.condition.predicate = | 306 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |
| 193 | GetPredicate(pred_index, instr.negate_pred != 0); | 307 | if (single_branch.condition.predicate == Pred::NeverExecute) { |
| 194 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 195 | offset++; | 308 | offset++; |
| 196 | continue; | 309 | continue; |
| 197 | } | 310 | } |
| 198 | const ConditionCode cc = instr.flow_condition_code; | 311 | const ConditionCode cc = instr.flow_condition_code; |
| 199 | parse_info.branch_info.condition.cc = cc; | 312 | single_branch.condition.cc = cc; |
| 200 | if (cc == ConditionCode::F) { | 313 | if (cc == ConditionCode::F) { |
| 201 | offset++; | 314 | offset++; |
| 202 | continue; | 315 | continue; |
| 203 | } | 316 | } |
| 204 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | 317 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); |
| 205 | if (branch_offset == 0) { | 318 | if (branch_offset == 0) { |
| 206 | parse_info.branch_info.address = exit_branch; | 319 | single_branch.address = exit_branch; |
| 207 | } else { | 320 | } else { |
| 208 | parse_info.branch_info.address = branch_offset; | 321 | single_branch.address = branch_offset; |
| 209 | } | 322 | } |
| 210 | insert_label(state, branch_offset); | 323 | insert_label(state, branch_offset); |
| 211 | parse_info.branch_info.kill = false; | 324 | single_branch.kill = false; |
| 212 | parse_info.branch_info.is_sync = false; | 325 | single_branch.is_sync = false; |
| 213 | parse_info.branch_info.is_brk = false; | 326 | single_branch.is_brk = false; |
| 214 | parse_info.branch_info.ignore = false; | 327 | single_branch.ignore = false; |
| 215 | parse_info.end_address = offset; | 328 | parse_info.end_address = offset; |
| 329 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 330 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 331 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 216 | 332 | ||
| 217 | return {ParseResult::ControlCaught, parse_info}; | 333 | return {ParseResult::ControlCaught, parse_info}; |
| 218 | } | 334 | } |
| 219 | case OpCode::Id::SYNC: { | 335 | case OpCode::Id::SYNC: { |
| 220 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | 336 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); |
| 221 | parse_info.branch_info.condition.predicate = | 337 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |
| 222 | GetPredicate(pred_index, instr.negate_pred != 0); | 338 | if (single_branch.condition.predicate == Pred::NeverExecute) { |
| 223 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 224 | offset++; | 339 | offset++; |
| 225 | continue; | 340 | continue; |
| 226 | } | 341 | } |
| 227 | const ConditionCode cc = instr.flow_condition_code; | 342 | const ConditionCode cc = instr.flow_condition_code; |
| 228 | parse_info.branch_info.condition.cc = cc; | 343 | single_branch.condition.cc = cc; |
| 229 | if (cc == ConditionCode::F) { | 344 | if (cc == ConditionCode::F) { |
| 230 | offset++; | 345 | offset++; |
| 231 | continue; | 346 | continue; |
| 232 | } | 347 | } |
| 233 | parse_info.branch_info.address = unassigned_branch; | 348 | single_branch.address = unassigned_branch; |
| 234 | parse_info.branch_info.kill = false; | 349 | single_branch.kill = false; |
| 235 | parse_info.branch_info.is_sync = true; | 350 | single_branch.is_sync = true; |
| 236 | parse_info.branch_info.is_brk = false; | 351 | single_branch.is_brk = false; |
| 237 | parse_info.branch_info.ignore = false; | 352 | single_branch.ignore = false; |
| 238 | parse_info.end_address = offset; | 353 | parse_info.end_address = offset; |
| 354 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 355 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 356 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 239 | 357 | ||
| 240 | return {ParseResult::ControlCaught, parse_info}; | 358 | return {ParseResult::ControlCaught, parse_info}; |
| 241 | } | 359 | } |
| 242 | case OpCode::Id::BRK: { | 360 | case OpCode::Id::BRK: { |
| 243 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | 361 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); |
| 244 | parse_info.branch_info.condition.predicate = | 362 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |
| 245 | GetPredicate(pred_index, instr.negate_pred != 0); | 363 | if (single_branch.condition.predicate == Pred::NeverExecute) { |
| 246 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 247 | offset++; | 364 | offset++; |
| 248 | continue; | 365 | continue; |
| 249 | } | 366 | } |
| 250 | const ConditionCode cc = instr.flow_condition_code; | 367 | const ConditionCode cc = instr.flow_condition_code; |
| 251 | parse_info.branch_info.condition.cc = cc; | 368 | single_branch.condition.cc = cc; |
| 252 | if (cc == ConditionCode::F) { | 369 | if (cc == ConditionCode::F) { |
| 253 | offset++; | 370 | offset++; |
| 254 | continue; | 371 | continue; |
| 255 | } | 372 | } |
| 256 | parse_info.branch_info.address = unassigned_branch; | 373 | single_branch.address = unassigned_branch; |
| 257 | parse_info.branch_info.kill = false; | 374 | single_branch.kill = false; |
| 258 | parse_info.branch_info.is_sync = false; | 375 | single_branch.is_sync = false; |
| 259 | parse_info.branch_info.is_brk = true; | 376 | single_branch.is_brk = true; |
| 260 | parse_info.branch_info.ignore = false; | 377 | single_branch.ignore = false; |
| 261 | parse_info.end_address = offset; | 378 | parse_info.end_address = offset; |
| 379 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 380 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 381 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 262 | 382 | ||
| 263 | return {ParseResult::ControlCaught, parse_info}; | 383 | return {ParseResult::ControlCaught, parse_info}; |
| 264 | } | 384 | } |
| 265 | case OpCode::Id::KIL: { | 385 | case OpCode::Id::KIL: { |
| 266 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | 386 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); |
| 267 | parse_info.branch_info.condition.predicate = | 387 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); |
| 268 | GetPredicate(pred_index, instr.negate_pred != 0); | 388 | if (single_branch.condition.predicate == Pred::NeverExecute) { |
| 269 | if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { | ||
| 270 | offset++; | 389 | offset++; |
| 271 | continue; | 390 | continue; |
| 272 | } | 391 | } |
| 273 | const ConditionCode cc = instr.flow_condition_code; | 392 | const ConditionCode cc = instr.flow_condition_code; |
| 274 | parse_info.branch_info.condition.cc = cc; | 393 | single_branch.condition.cc = cc; |
| 275 | if (cc == ConditionCode::F) { | 394 | if (cc == ConditionCode::F) { |
| 276 | offset++; | 395 | offset++; |
| 277 | continue; | 396 | continue; |
| 278 | } | 397 | } |
| 279 | parse_info.branch_info.address = exit_branch; | 398 | single_branch.address = exit_branch; |
| 280 | parse_info.branch_info.kill = true; | 399 | single_branch.kill = true; |
| 281 | parse_info.branch_info.is_sync = false; | 400 | single_branch.is_sync = false; |
| 282 | parse_info.branch_info.is_brk = false; | 401 | single_branch.is_brk = false; |
| 283 | parse_info.branch_info.ignore = false; | 402 | single_branch.ignore = false; |
| 284 | parse_info.end_address = offset; | 403 | parse_info.end_address = offset; |
| 404 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 405 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 406 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 285 | 407 | ||
| 286 | return {ParseResult::ControlCaught, parse_info}; | 408 | return {ParseResult::ControlCaught, parse_info}; |
| 287 | } | 409 | } |
| @@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 298 | break; | 420 | break; |
| 299 | } | 421 | } |
| 300 | case OpCode::Id::BRX: { | 422 | case OpCode::Id::BRX: { |
| 423 | auto tmp = TrackBranchIndirectInfo(state, address, offset); | ||
| 424 | if (tmp) { | ||
| 425 | auto result = *tmp; | ||
| 426 | std::vector<CaseBranch> branches{}; | ||
| 427 | s32 pc_target = offset + result.relative_position; | ||
| 428 | for (u32 i = 0; i < result.entries; i++) { | ||
| 429 | auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4); | ||
| 430 | if (!k) { | ||
| 431 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 432 | } | ||
| 433 | u32 value = *k; | ||
| 434 | u32 target = static_cast<u32>((value >> 3) + pc_target); | ||
| 435 | insert_label(state, target); | ||
| 436 | branches.emplace_back(value, target); | ||
| 437 | } | ||
| 438 | parse_info.end_address = offset; | ||
| 439 | parse_info.branch_info = MakeBranchInfo<MultiBranch>( | ||
| 440 | static_cast<u32>(instr.gpr8.Value()), std::move(branches)); | ||
| 441 | |||
| 442 | return {ParseResult::ControlCaught, parse_info}; | ||
| 443 | } else { | ||
| 444 | LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); | ||
| 445 | } | ||
| 301 | return {ParseResult::AbnormalFlow, parse_info}; | 446 | return {ParseResult::AbnormalFlow, parse_info}; |
| 302 | } | 447 | } |
| 303 | default: | 448 | default: |
| @@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||
| 306 | 451 | ||
| 307 | offset++; | 452 | offset++; |
| 308 | } | 453 | } |
| 309 | parse_info.branch_info.kill = false; | 454 | single_branch.kill = false; |
| 310 | parse_info.branch_info.is_sync = false; | 455 | single_branch.is_sync = false; |
| 311 | parse_info.branch_info.is_brk = false; | 456 | single_branch.is_brk = false; |
| 312 | parse_info.end_address = offset - 1; | 457 | parse_info.end_address = offset - 1; |
| 458 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 459 | single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, | ||
| 460 | single_branch.is_brk, single_branch.ignore); | ||
| 313 | return {ParseResult::BlockEnd, parse_info}; | 461 | return {ParseResult::BlockEnd, parse_info}; |
| 314 | } | 462 | } |
| 315 | 463 | ||
| @@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) { | |||
| 333 | BlockInfo& current_block = state.block_info[block_index]; | 481 | BlockInfo& current_block = state.block_info[block_index]; |
| 334 | current_block.end = address - 1; | 482 | current_block.end = address - 1; |
| 335 | new_block.branch = current_block.branch; | 483 | new_block.branch = current_block.branch; |
| 336 | BlockBranchInfo forward_branch{}; | 484 | BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); |
| 337 | forward_branch.address = address; | 485 | const auto branch = std::get_if<SingleBranch>(forward_branch.get()); |
| 338 | forward_branch.ignore = true; | 486 | branch->address = address; |
| 487 | branch->ignore = true; | ||
| 339 | current_block.branch = forward_branch; | 488 | current_block.branch = forward_branch; |
| 340 | return true; | 489 | return true; |
| 341 | } | 490 | } |
| @@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) { | |||
| 350 | 499 | ||
| 351 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | 500 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); |
| 352 | block_info.branch = parse_info.branch_info; | 501 | block_info.branch = parse_info.branch_info; |
| 353 | if (parse_info.branch_info.condition.IsUnconditional()) { | 502 | if (std::holds_alternative<SingleBranch>(*block_info.branch)) { |
| 503 | const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); | ||
| 504 | if (branch->condition.IsUnconditional()) { | ||
| 505 | return true; | ||
| 506 | } | ||
| 507 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 508 | state.inspect_queries.push_front(fallthrough_address); | ||
| 354 | return true; | 509 | return true; |
| 355 | } | 510 | } |
| 356 | |||
| 357 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 358 | state.inspect_queries.push_front(fallthrough_address); | ||
| 359 | return true; | 511 | return true; |
| 360 | } | 512 | } |
| 361 | 513 | ||
| @@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) { | |||
| 393 | state.queries.pop_front(); | 545 | state.queries.pop_front(); |
| 394 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | 546 | gather_labels(q2.ssy_stack, state.ssy_labels, block); |
| 395 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | 547 | gather_labels(q2.pbk_stack, state.pbk_labels, block); |
| 396 | if (!block.branch.condition.IsUnconditional()) { | 548 | if (std::holds_alternative<SingleBranch>(*block.branch)) { |
| 397 | q2.address = block.end + 1; | 549 | const auto branch = std::get_if<SingleBranch>(block.branch.get()); |
| 398 | state.queries.push_back(q2); | 550 | if (!branch->condition.IsUnconditional()) { |
| 399 | } | 551 | q2.address = block.end + 1; |
| 552 | state.queries.push_back(q2); | ||
| 553 | } | ||
| 400 | 554 | ||
| 401 | Query conditional_query{q2}; | 555 | Query conditional_query{q2}; |
| 402 | if (block.branch.is_sync) { | 556 | if (branch->is_sync) { |
| 403 | if (block.branch.address == unassigned_branch) { | 557 | if (branch->address == unassigned_branch) { |
| 404 | block.branch.address = conditional_query.ssy_stack.top(); | 558 | branch->address = conditional_query.ssy_stack.top(); |
| 559 | } | ||
| 560 | conditional_query.ssy_stack.pop(); | ||
| 405 | } | 561 | } |
| 406 | conditional_query.ssy_stack.pop(); | 562 | if (branch->is_brk) { |
| 407 | } | 563 | if (branch->address == unassigned_branch) { |
| 408 | if (block.branch.is_brk) { | 564 | branch->address = conditional_query.pbk_stack.top(); |
| 409 | if (block.branch.address == unassigned_branch) { | 565 | } |
| 410 | block.branch.address = conditional_query.pbk_stack.top(); | 566 | conditional_query.pbk_stack.pop(); |
| 411 | } | 567 | } |
| 412 | conditional_query.pbk_stack.pop(); | 568 | conditional_query.address = branch->address; |
| 569 | state.queries.push_back(std::move(conditional_query)); | ||
| 570 | return true; | ||
| 571 | } | ||
| 572 | const auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 573 | for (const auto& branch_case : multi_branch->branches) { | ||
| 574 | Query conditional_query{q2}; | ||
| 575 | conditional_query.address = branch_case.address; | ||
| 576 | state.queries.push_back(std::move(conditional_query)); | ||
| 413 | } | 577 | } |
| 414 | conditional_query.address = block.branch.address; | ||
| 415 | state.queries.push_back(std::move(conditional_query)); | ||
| 416 | return true; | 578 | return true; |
| 417 | } | 579 | } |
| 580 | |||
| 418 | } // Anonymous namespace | 581 | } // Anonymous namespace |
| 419 | 582 | ||
| 420 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { | 583 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { |
| 421 | const auto get_expr = ([&](const Condition& cond) -> Expr { | 584 | const auto get_expr = ([&](const Condition& cond) -> Expr { |
| 422 | Expr result{}; | 585 | Expr result{}; |
| 423 | if (cond.cc != ConditionCode::T) { | 586 | if (cond.cc != ConditionCode::T) { |
| @@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { | |||
| 444 | } | 607 | } |
| 445 | return MakeExpr<ExprBoolean>(true); | 608 | return MakeExpr<ExprBoolean>(true); |
| 446 | }); | 609 | }); |
| 447 | if (branch.address < 0) { | 610 | if (std::holds_alternative<SingleBranch>(*branch_info)) { |
| 448 | if (branch.kill) { | 611 | const auto branch = std::get_if<SingleBranch>(branch_info.get()); |
| 449 | mm.InsertReturn(get_expr(branch.condition), true); | 612 | if (branch->address < 0) { |
| 613 | if (branch->kill) { | ||
| 614 | mm.InsertReturn(get_expr(branch->condition), true); | ||
| 615 | return; | ||
| 616 | } | ||
| 617 | mm.InsertReturn(get_expr(branch->condition), false); | ||
| 450 | return; | 618 | return; |
| 451 | } | 619 | } |
| 452 | mm.InsertReturn(get_expr(branch.condition), false); | 620 | mm.InsertGoto(get_expr(branch->condition), branch->address); |
| 453 | return; | 621 | return; |
| 454 | } | 622 | } |
| 455 | mm.InsertGoto(get_expr(branch.condition), branch.address); | 623 | const auto multi_branch = std::get_if<MultiBranch>(branch_info.get()); |
| 624 | for (const auto& branch_case : multi_branch->branches) { | ||
| 625 | mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), | ||
| 626 | branch_case.address); | ||
| 627 | } | ||
| 456 | } | 628 | } |
| 457 | 629 | ||
| 458 | void DecompileShader(CFGRebuildState& state) { | 630 | void DecompileShader(CFGRebuildState& state) { |
| @@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) { | |||
| 464 | if (state.labels.count(block.start) != 0) { | 636 | if (state.labels.count(block.start) != 0) { |
| 465 | state.manager->InsertLabel(block.start); | 637 | state.manager->InsertLabel(block.start); |
| 466 | } | 638 | } |
| 467 | u32 end = block.branch.ignore ? block.end + 1 : block.end; | 639 | const bool ignore = BlockBranchIsIgnored(block.branch); |
| 640 | u32 end = ignore ? block.end + 1 : block.end; | ||
| 468 | state.manager->InsertBlock(block.start, end); | 641 | state.manager->InsertBlock(block.start, end); |
| 469 | if (!block.branch.ignore) { | 642 | if (!ignore) { |
| 470 | InsertBranch(*state.manager, block.branch); | 643 | InsertBranch(*state.manager, block.branch); |
| 471 | } | 644 | } |
| 472 | } | 645 | } |
| 473 | state.manager->Decompile(); | 646 | state.manager->Decompile(); |
| 474 | } | 647 | } |
| 475 | 648 | ||
| 476 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | 649 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 477 | u32 start_address, | 650 | const CompilerSettings& settings, |
| 478 | const CompilerSettings& settings) { | 651 | ConstBufferLocker& locker) { |
| 479 | auto result_out = std::make_unique<ShaderCharacteristics>(); | 652 | auto result_out = std::make_unique<ShaderCharacteristics>(); |
| 480 | if (settings.depth == CompileDepth::BruteForce) { | 653 | if (settings.depth == CompileDepth::BruteForce) { |
| 481 | result_out->settings.depth = CompileDepth::BruteForce; | 654 | result_out->settings.depth = CompileDepth::BruteForce; |
| 482 | return result_out; | 655 | return result_out; |
| 483 | } | 656 | } |
| 484 | 657 | ||
| 485 | CFGRebuildState state{program_code, program_size, start_address}; | 658 | CFGRebuildState state{program_code, start_address, locker}; |
| 486 | // Inspect Code and generate blocks | 659 | // Inspect Code and generate blocks |
| 487 | state.labels.clear(); | 660 | state.labels.clear(); |
| 488 | state.labels.emplace(start_address); | 661 | state.labels.emplace(start_address); |
| @@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | |||
| 547 | ShaderBlock new_block{}; | 720 | ShaderBlock new_block{}; |
| 548 | new_block.start = block.start; | 721 | new_block.start = block.start; |
| 549 | new_block.end = block.end; | 722 | new_block.end = block.end; |
| 550 | new_block.ignore_branch = block.branch.ignore; | 723 | new_block.ignore_branch = BlockBranchIsIgnored(block.branch); |
| 551 | if (!new_block.ignore_branch) { | 724 | if (!new_block.ignore_branch) { |
| 552 | new_block.branch.cond = block.branch.condition; | 725 | new_block.branch = block.branch; |
| 553 | new_block.branch.kills = block.branch.kill; | ||
| 554 | new_block.branch.address = block.branch.address; | ||
| 555 | } | 726 | } |
| 556 | result_out->end = std::max(result_out->end, block.end); | 727 | result_out->end = std::max(result_out->end, block.end); |
| 557 | result_out->blocks.push_back(new_block); | 728 | result_out->blocks.push_back(new_block); |
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 74e54a5c7..5304998b9 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <list> | 7 | #include <list> |
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <set> | 9 | #include <set> |
| 10 | #include <variant> | ||
| 10 | 11 | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | 12 | #include "video_core/engines/shader_bytecode.h" |
| 12 | #include "video_core/shader/ast.h" | 13 | #include "video_core/shader/ast.h" |
| @@ -37,29 +38,61 @@ struct Condition { | |||
| 37 | } | 38 | } |
| 38 | }; | 39 | }; |
| 39 | 40 | ||
| 40 | struct ShaderBlock { | 41 | class SingleBranch { |
| 41 | struct Branch { | 42 | public: |
| 42 | Condition cond{}; | 43 | SingleBranch() = default; |
| 43 | bool kills{}; | 44 | SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, |
| 44 | s32 address{}; | 45 | bool ignore) |
| 46 | : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, | ||
| 47 | ignore{ignore} {} | ||
| 48 | |||
| 49 | bool operator==(const SingleBranch& b) const { | ||
| 50 | return std::tie(condition, address, kill, is_sync, is_brk, ignore) == | ||
| 51 | std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); | ||
| 52 | } | ||
| 53 | |||
| 54 | bool operator!=(const SingleBranch& b) const { | ||
| 55 | return !operator==(b); | ||
| 56 | } | ||
| 57 | |||
| 58 | Condition condition{}; | ||
| 59 | s32 address{exit_branch}; | ||
| 60 | bool kill{}; | ||
| 61 | bool is_sync{}; | ||
| 62 | bool is_brk{}; | ||
| 63 | bool ignore{}; | ||
| 64 | }; | ||
| 45 | 65 | ||
| 46 | bool operator==(const Branch& b) const { | 66 | struct CaseBranch { |
| 47 | return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); | 67 | CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} |
| 48 | } | 68 | u32 cmp_value; |
| 69 | u32 address; | ||
| 70 | }; | ||
| 71 | |||
| 72 | class MultiBranch { | ||
| 73 | public: | ||
| 74 | MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches) | ||
| 75 | : gpr{gpr}, branches{std::move(branches)} {} | ||
| 76 | |||
| 77 | u32 gpr{}; | ||
| 78 | std::vector<CaseBranch> branches{}; | ||
| 79 | }; | ||
| 80 | |||
| 81 | using BranchData = std::variant<SingleBranch, MultiBranch>; | ||
| 82 | using BlockBranchInfo = std::shared_ptr<BranchData>; | ||
| 49 | 83 | ||
| 50 | bool operator!=(const Branch& b) const { | 84 | bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); |
| 51 | return !operator==(b); | ||
| 52 | } | ||
| 53 | }; | ||
| 54 | 85 | ||
| 86 | struct ShaderBlock { | ||
| 55 | u32 start{}; | 87 | u32 start{}; |
| 56 | u32 end{}; | 88 | u32 end{}; |
| 57 | bool ignore_branch{}; | 89 | bool ignore_branch{}; |
| 58 | Branch branch{}; | 90 | BlockBranchInfo branch{}; |
| 59 | 91 | ||
| 60 | bool operator==(const ShaderBlock& sb) const { | 92 | bool operator==(const ShaderBlock& sb) const { |
| 61 | return std::tie(start, end, ignore_branch, branch) == | 93 | return std::tie(start, end, ignore_branch) == |
| 62 | std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); | 94 | std::tie(sb.start, sb.end, sb.ignore_branch) && |
| 95 | BlockBranchInfoAreEqual(branch, sb.branch); | ||
| 63 | } | 96 | } |
| 64 | 97 | ||
| 65 | bool operator!=(const ShaderBlock& sb) const { | 98 | bool operator!=(const ShaderBlock& sb) const { |
| @@ -76,8 +109,8 @@ struct ShaderCharacteristics { | |||
| 76 | CompilerSettings settings{}; | 109 | CompilerSettings settings{}; |
| 77 | }; | 110 | }; |
| 78 | 111 | ||
| 79 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 program_size, | 112 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, |
| 80 | u32 start_address, | 113 | const CompilerSettings& settings, |
| 81 | const CompilerSettings& settings); | 114 | ConstBufferLocker& locker); |
| 82 | 115 | ||
| 83 | } // namespace VideoCommon::Shader | 116 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2626b1616..21fb9cb83 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 33 | return (absolute_offset % SchedPeriod) == 0; | 33 | return (absolute_offset % SchedPeriod) == 0; |
| 34 | } | 34 | } |
| 35 | 35 | ||
| 36 | } // namespace | 36 | } // Anonymous namespace |
| 37 | 37 | ||
| 38 | class ASTDecoder { | 38 | class ASTDecoder { |
| 39 | public: | 39 | public: |
| @@ -102,7 +102,7 @@ void ShaderIR::Decode() { | |||
| 102 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | 102 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |
| 103 | 103 | ||
| 104 | decompiled = false; | 104 | decompiled = false; |
| 105 | auto info = ScanFlow(program_code, program_size, main_offset, settings); | 105 | auto info = ScanFlow(program_code, main_offset, settings, locker); |
| 106 | auto& shader_info = *info; | 106 | auto& shader_info = *info; |
| 107 | coverage_begin = shader_info.start; | 107 | coverage_begin = shader_info.start; |
| 108 | coverage_end = shader_info.end; | 108 | coverage_end = shader_info.end; |
| @@ -155,7 +155,7 @@ void ShaderIR::Decode() { | |||
| 155 | [[fallthrough]]; | 155 | [[fallthrough]]; |
| 156 | case CompileDepth::BruteForce: { | 156 | case CompileDepth::BruteForce: { |
| 157 | coverage_begin = main_offset; | 157 | coverage_begin = main_offset; |
| 158 | const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); | 158 | const std::size_t shader_end = program_code.size(); |
| 159 | coverage_end = shader_end; | 159 | coverage_end = shader_end; |
| 160 | for (u32 label = main_offset; label < shader_end; label++) { | 160 | for (u32 label = main_offset; label < shader_end; label++) { |
| 161 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | 161 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); |
| @@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | |||
| 198 | } | 198 | } |
| 199 | return result; | 199 | return result; |
| 200 | }; | 200 | }; |
| 201 | if (block.branch.address < 0) { | 201 | if (std::holds_alternative<SingleBranch>(*block.branch)) { |
| 202 | if (block.branch.kills) { | 202 | auto branch = std::get_if<SingleBranch>(block.branch.get()); |
| 203 | Node n = Operation(OperationCode::Discard); | 203 | if (branch->address < 0) { |
| 204 | n = apply_conditions(block.branch.cond, n); | 204 | if (branch->kill) { |
| 205 | Node n = Operation(OperationCode::Discard); | ||
| 206 | n = apply_conditions(branch->condition, n); | ||
| 207 | bb.push_back(n); | ||
| 208 | global_code.push_back(n); | ||
| 209 | return; | ||
| 210 | } | ||
| 211 | Node n = Operation(OperationCode::Exit); | ||
| 212 | n = apply_conditions(branch->condition, n); | ||
| 205 | bb.push_back(n); | 213 | bb.push_back(n); |
| 206 | global_code.push_back(n); | 214 | global_code.push_back(n); |
| 207 | return; | 215 | return; |
| 208 | } | 216 | } |
| 209 | Node n = Operation(OperationCode::Exit); | 217 | Node n = Operation(OperationCode::Branch, Immediate(branch->address)); |
| 210 | n = apply_conditions(block.branch.cond, n); | 218 | n = apply_conditions(branch->condition, n); |
| 211 | bb.push_back(n); | 219 | bb.push_back(n); |
| 212 | global_code.push_back(n); | 220 | global_code.push_back(n); |
| 213 | return; | 221 | return; |
| 214 | } | 222 | } |
| 215 | Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); | 223 | auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); |
| 216 | n = apply_conditions(block.branch.cond, n); | 224 | Node op_a = GetRegister(multi_branch->gpr); |
| 217 | bb.push_back(n); | 225 | for (auto& branch_case : multi_branch->branches) { |
| 218 | global_code.push_back(n); | 226 | Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); |
| 227 | Node op_b = Immediate(branch_case.cmp_value); | ||
| 228 | Node condition = | ||
| 229 | GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); | ||
| 230 | auto result = Conditional(condition, {n}); | ||
| 231 | bb.push_back(result); | ||
| 232 | global_code.push_back(result); | ||
| 233 | } | ||
| 219 | } | 234 | } |
| 220 | 235 | ||
| 221 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | 236 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { |
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index b73f6536e..a33d242e9 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp | |||
| @@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | |||
| 144 | case OpCode::Id::ICMP_IMM: { | 144 | case OpCode::Id::ICMP_IMM: { |
| 145 | const Node zero = Immediate(0); | 145 | const Node zero = Immediate(0); |
| 146 | 146 | ||
| 147 | const auto [op_b, test] = [&]() -> std::pair<Node, Node> { | 147 | const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { |
| 148 | switch (opcode->get().GetId()) { | 148 | switch (opcode->get().GetId()) { |
| 149 | case OpCode::Id::ICMP_CR: | 149 | case OpCode::Id::ICMP_CR: |
| 150 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), | 150 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), |
| @@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | |||
| 161 | return {zero, zero}; | 161 | return {zero, zero}; |
| 162 | } | 162 | } |
| 163 | }(); | 163 | }(); |
| 164 | const Node op_a = GetRegister(instr.gpr8); | 164 | const Node op_lhs = GetRegister(instr.gpr8); |
| 165 | const Node comparison = | 165 | const Node comparison = |
| 166 | GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); | 166 | GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); |
| 167 | SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b)); | 167 | SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); |
| 168 | break; | 168 | break; |
| 169 | } | 169 | } |
| 170 | case OpCode::Id::LOP_C: | 170 | case OpCode::Id::LOP_C: |
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 95ec1cdd9..b02d2cb95 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp | |||
| @@ -144,8 +144,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 144 | 144 | ||
| 145 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | 145 | Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { |
| 146 | const auto offset{static_cast<std::size_t>(image.index.Value())}; | 146 | const auto offset{static_cast<std::size_t>(image.index.Value())}; |
| 147 | if (const auto image = TryUseExistingImage(offset, type)) { | 147 | if (const auto existing_image = TryUseExistingImage(offset, type)) { |
| 148 | return *image; | 148 | return *existing_image; |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | const std::size_t next_index{used_images.size()}; | 151 | const std::size_t next_index{used_images.size()}; |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7923d4d69..335d78146 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 166 | }(); | 166 | }(); |
| 167 | 167 | ||
| 168 | const auto [real_address_base, base_address, descriptor] = | 168 | const auto [real_address_base, base_address, descriptor] = |
| 169 | TrackAndGetGlobalMemory(bb, instr, false); | 169 | TrackGlobalMemory(bb, instr, false); |
| 170 | 170 | ||
| 171 | const u32 count = GetUniformTypeElementsCount(type); | 171 | const u32 count = GetUniformTypeElementsCount(type); |
| 172 | if (!real_address_base || !base_address) { | ||
| 173 | // Tracking failed, load zeroes. | ||
| 174 | for (u32 i = 0; i < count; ++i) { | ||
| 175 | SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); | ||
| 176 | } | ||
| 177 | break; | ||
| 178 | } | ||
| 179 | |||
| 172 | for (u32 i = 0; i < count; ++i) { | 180 | for (u32 i = 0; i < count; ++i) { |
| 173 | const Node it_offset = Immediate(i * 4); | 181 | const Node it_offset = Immediate(i * 4); |
| 174 | const Node real_address = | 182 | const Node real_address = |
| @@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 260 | }(); | 268 | }(); |
| 261 | 269 | ||
| 262 | const auto [real_address_base, base_address, descriptor] = | 270 | const auto [real_address_base, base_address, descriptor] = |
| 263 | TrackAndGetGlobalMemory(bb, instr, true); | 271 | TrackGlobalMemory(bb, instr, true); |
| 264 | 272 | if (!real_address_base || !base_address) { | |
| 265 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | 273 | // Tracking failed, skip the store. |
| 266 | SetTemporary(bb, 0, real_address_base); | 274 | break; |
| 275 | } | ||
| 267 | 276 | ||
| 268 | const u32 count = GetUniformTypeElementsCount(type); | 277 | const u32 count = GetUniformTypeElementsCount(type); |
| 269 | for (u32 i = 0; i < count; ++i) { | 278 | for (u32 i = 0; i < count; ++i) { |
| 270 | SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | ||
| 271 | } | ||
| 272 | for (u32 i = 0; i < count; ++i) { | ||
| 273 | const Node it_offset = Immediate(i * 4); | 279 | const Node it_offset = Immediate(i * 4); |
| 274 | const Node real_address = | 280 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); |
| 275 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | ||
| 276 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 281 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 277 | 282 | const Node value = GetRegister(instr.gpr0.Value() + i); | |
| 278 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1))); | 283 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); |
| 279 | } | 284 | } |
| 280 | break; | 285 | break; |
| 281 | } | 286 | } |
| @@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 301 | return pc; | 306 | return pc; |
| 302 | } | 307 | } |
| 303 | 308 | ||
| 304 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, | 309 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, |
| 305 | Instruction instr, | 310 | Instruction instr, |
| 306 | bool is_write) { | 311 | bool is_write) { |
| 307 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | 312 | const auto addr_register{GetRegister(instr.gmem.gpr)}; |
| 308 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | 313 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; |
| 309 | 314 | ||
| 310 | const auto [base_address, index, offset] = | 315 | const auto [base_address, index, offset] = |
| 311 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); | 316 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); |
| 312 | ASSERT(base_address != nullptr); | 317 | ASSERT_OR_EXECUTE_MSG(base_address != nullptr, |
| 318 | { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, | ||
| 319 | "Global memory tracking failed"); | ||
| 313 | 320 | ||
| 314 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); | 321 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); |
| 315 | 322 | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d46e0f823..116b95f76 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 67 | break; | 67 | break; |
| 68 | } | 68 | } |
| 69 | case OpCode::Id::MOV_SYS: { | 69 | case OpCode::Id::MOV_SYS: { |
| 70 | const Node value = [&]() { | 70 | const Node value = [this, instr] { |
| 71 | switch (instr.sys20) { | 71 | switch (instr.sys20) { |
| 72 | case SystemVariable::Ydirection: | 72 | case SystemVariable::Ydirection: |
| 73 | return Operation(OperationCode::YNegate); | 73 | return Operation(OperationCode::YNegate); |
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index f6ee68a54..d419e9c45 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp | |||
| @@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { | |||
| 18 | const auto opcode = OpCode::Decode(instr); | 18 | const auto opcode = OpCode::Decode(instr); |
| 19 | 19 | ||
| 20 | Node op_a = GetRegister(instr.gpr8); | 20 | Node op_a = GetRegister(instr.gpr8); |
| 21 | Node op_b = [&]() { | 21 | Node op_b = [this, instr] { |
| 22 | if (instr.is_b_imm) { | 22 | if (instr.is_b_imm) { |
| 23 | return Immediate(instr.alu.GetSignedImm20_20()); | 23 | return Immediate(instr.alu.GetSignedImm20_20()); |
| 24 | } else if (instr.is_b_gpr) { | 24 | } else if (instr.is_b_gpr) { |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a069..d61e656b7 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 141 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | 141 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); |
| 142 | 142 | ||
| 143 | const auto& sampler = | 143 | const auto& sampler = |
| 144 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | 144 | GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); |
| 145 | 145 | ||
| 146 | Node4 values; | 146 | Node4 values; |
| 147 | for (u32 element = 0; element < values.size(); ++element) { | 147 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -150,7 +150,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 150 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 150 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 151 | } | 151 | } |
| 152 | 152 | ||
| 153 | WriteTexsInstructionFloat(bb, instr, values); | 153 | WriteTexsInstructionFloat(bb, instr, values, true); |
| 154 | break; | 154 | break; |
| 155 | } | 155 | } |
| 156 | case OpCode::Id::TXQ_B: | 156 | case OpCode::Id::TXQ_B: |
| @@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 165 | // Sadly, not all texture instructions specify the type of texture their sampler | 165 | // Sadly, not all texture instructions specify the type of texture their sampler |
| 166 | // uses. This must be fixed at a later instance. | 166 | // uses. This must be fixed at a later instance. |
| 167 | const auto& sampler = | 167 | const auto& sampler = |
| 168 | is_bindless | 168 | is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); |
| 169 | ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, | ||
| 170 | false) | ||
| 171 | : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 172 | 169 | ||
| 173 | u32 indexer = 0; | 170 | u32 indexer = 0; |
| 174 | switch (instr.txq.query_type) { | 171 | switch (instr.txq.query_type) { |
| @@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 207 | 204 | ||
| 208 | auto texture_type = instr.tmml.texture_type.Value(); | 205 | auto texture_type = instr.tmml.texture_type.Value(); |
| 209 | const bool is_array = instr.tmml.array != 0; | 206 | const bool is_array = instr.tmml.array != 0; |
| 210 | const auto& sampler = is_bindless | 207 | const auto& sampler = |
| 211 | ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) | 208 | is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) |
| 212 | : GetSampler(instr.sampler, texture_type, is_array, false); | 209 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 213 | 210 | ||
| 214 | std::vector<Node> coords; | 211 | std::vector<Node> coords; |
| 215 | 212 | ||
| @@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 285 | return pc; | 282 | return pc; |
| 286 | } | 283 | } |
| 287 | 284 | ||
| 288 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | 285 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, |
| 289 | bool is_array, bool is_shadow) { | 286 | std::optional<SamplerInfo> sampler_info) { |
| 290 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | 287 | const auto offset = static_cast<u32>(sampler.index.Value()); |
| 288 | |||
| 289 | Tegra::Shader::TextureType type; | ||
| 290 | bool is_array; | ||
| 291 | bool is_shadow; | ||
| 292 | if (sampler_info) { | ||
| 293 | type = sampler_info->type; | ||
| 294 | is_array = sampler_info->is_array; | ||
| 295 | is_shadow = sampler_info->is_shadow; | ||
| 296 | } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) { | ||
| 297 | type = sampler->texture_type.Value(); | ||
| 298 | is_array = sampler->is_array.Value() != 0; | ||
| 299 | is_shadow = sampler->is_shadow.Value() != 0; | ||
| 300 | } else { | ||
| 301 | type = Tegra::Shader::TextureType::Texture2D; | ||
| 302 | is_array = false; | ||
| 303 | is_shadow = false; | ||
| 304 | } | ||
| 291 | 305 | ||
| 292 | // If this sampler has already been used, return the existing mapping. | 306 | // If this sampler has already been used, return the existing mapping. |
| 293 | const auto itr = | 307 | const auto itr = |
| @@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 303 | const std::size_t next_index = used_samplers.size(); | 317 | const std::size_t next_index = used_samplers.size(); |
| 304 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | 318 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; |
| 305 | return *used_samplers.emplace(entry).first; | 319 | return *used_samplers.emplace(entry).first; |
| 306 | } | 320 | } // namespace VideoCommon::Shader |
| 307 | 321 | ||
| 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | 322 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, |
| 309 | bool is_array, bool is_shadow) { | 323 | std::optional<SamplerInfo> sampler_info) { |
| 310 | const Node sampler_register = GetRegister(reg); | 324 | const Node sampler_register = GetRegister(reg); |
| 311 | const auto [base_sampler, cbuf_index, cbuf_offset] = | 325 | const auto [base_sampler, cbuf_index, cbuf_offset] = |
| 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 326 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 313 | ASSERT(base_sampler != nullptr); | 327 | ASSERT(base_sampler != nullptr); |
| 314 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | 328 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |
| 329 | Tegra::Shader::TextureType type; | ||
| 330 | bool is_array; | ||
| 331 | bool is_shadow; | ||
| 332 | if (sampler_info) { | ||
| 333 | type = sampler_info->type; | ||
| 334 | is_array = sampler_info->is_array; | ||
| 335 | is_shadow = sampler_info->is_shadow; | ||
| 336 | } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) { | ||
| 337 | type = sampler->texture_type.Value(); | ||
| 338 | is_array = sampler->is_array.Value() != 0; | ||
| 339 | is_shadow = sampler->is_shadow.Value() != 0; | ||
| 340 | } else { | ||
| 341 | type = Tegra::Shader::TextureType::Texture2D; | ||
| 342 | is_array = false; | ||
| 343 | is_shadow = false; | ||
| 344 | } | ||
| 315 | 345 | ||
| 316 | // If this sampler has already been used, return the existing mapping. | 346 | // If this sampler has already been used, return the existing mapping. |
| 317 | const auto itr = | 347 | const auto itr = |
| @@ -344,14 +374,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const | |||
| 344 | } | 374 | } |
| 345 | } | 375 | } |
| 346 | 376 | ||
| 347 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | 377 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, |
| 348 | const Node4& components) { | 378 | bool ignore_mask) { |
| 349 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | 379 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle |
| 350 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | 380 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 |
| 351 | 381 | ||
| 352 | u32 dest_elem = 0; | 382 | u32 dest_elem = 0; |
| 353 | for (u32 component = 0; component < 4; ++component) { | 383 | for (u32 component = 0; component < 4; ++component) { |
| 354 | if (!instr.texs.IsComponentEnabled(component)) | 384 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) |
| 355 | continue; | 385 | continue; |
| 356 | SetTemporary(bb, dest_elem++, components[component]); | 386 | SetTemporary(bb, dest_elem++, components[component]); |
| 357 | } | 387 | } |
| @@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 411 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | 441 | (texture_type == TextureType::TextureCube && is_array && is_shadow), |
| 412 | "This method is not supported."); | 442 | "This method is not supported."); |
| 413 | 443 | ||
| 414 | const auto& sampler = is_bindless | 444 | const auto& sampler = |
| 415 | ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) | 445 | is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) |
| 416 | : GetSampler(instr.sampler, texture_type, is_array, is_shadow); | 446 | : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); |
| 417 | 447 | ||
| 418 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | 448 | const bool lod_needed = process_mode == TextureProcessMode::LZ || |
| 419 | process_mode == TextureProcessMode::LL || | 449 | process_mode == TextureProcessMode::LL || |
| @@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 577 | dc = GetRegister(parameter_register++); | 607 | dc = GetRegister(parameter_register++); |
| 578 | } | 608 | } |
| 579 | 609 | ||
| 580 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 610 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); |
| 581 | 611 | ||
| 582 | Node4 values; | 612 | Node4 values; |
| 583 | for (u32 element = 0; element < values.size(); ++element) { | 613 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 610 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | 640 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; |
| 611 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | 641 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; |
| 612 | 642 | ||
| 613 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 643 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 614 | 644 | ||
| 615 | Node4 values; | 645 | Node4 values; |
| 616 | for (u32 element = 0; element < values.size(); ++element) { | 646 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 646 | // When lod is used always is in gpr20 | 676 | // When lod is used always is in gpr20 |
| 647 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | 677 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 648 | 678 | ||
| 649 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 679 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 650 | 680 | ||
| 651 | Node4 values; | 681 | Node4 values; |
| 652 | for (u32 element = 0; element < values.size(); ++element) { | 682 | for (u32 element = 0; element < values.size(); ++element) { |
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp index 97fc6f9b1..b047cf870 100644 --- a/src/video_core/shader/decode/video.cpp +++ b/src/video_core/shader/decode/video.cpp | |||
| @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { | |||
| 23 | const Node op_a = | 23 | const Node op_a = |
| 24 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | 24 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, |
| 25 | instr.video.type_a, instr.video.byte_height_a); | 25 | instr.video.type_a, instr.video.byte_height_a); |
| 26 | const Node op_b = [&]() { | 26 | const Node op_b = [this, instr] { |
| 27 | if (instr.video.use_register_b) { | 27 | if (instr.video.use_register_b) { |
| 28 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | 28 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, |
| 29 | instr.video.signed_b, instr.video.type_b, | 29 | instr.video.signed_b, instr.video.type_b, |
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index a8e481b3c..fa8a250cc 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -46,9 +46,10 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | |||
| 46 | break; | 46 | break; |
| 47 | } | 47 | } |
| 48 | case OpCode::Id::SHFL: { | 48 | case OpCode::Id::SHFL: { |
| 49 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | 49 | Node width = [this, instr] { |
| 50 | : GetRegister(instr.gpr39); | 50 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) |
| 51 | Node width = [&] { | 51 | : GetRegister(instr.gpr39); |
| 52 | |||
| 52 | // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has | 53 | // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has |
| 53 | // been done reversing Nvidia's math. It won't work on all cases due to SHFL having | 54 | // been done reversing Nvidia's math. It won't work on all cases due to SHFL having |
| 54 | // different parameters that don't properly map to GLSL's interface, but it should work | 55 | // different parameters that don't properly map to GLSL's interface, but it should work |
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h index d3dcd00ec..4e8264367 100644 --- a/src/video_core/shader/expr.h +++ b/src/video_core/shader/expr.h | |||
| @@ -17,13 +17,14 @@ using Tegra::Shader::Pred; | |||
| 17 | class ExprAnd; | 17 | class ExprAnd; |
| 18 | class ExprBoolean; | 18 | class ExprBoolean; |
| 19 | class ExprCondCode; | 19 | class ExprCondCode; |
| 20 | class ExprGprEqual; | ||
| 20 | class ExprNot; | 21 | class ExprNot; |
| 21 | class ExprOr; | 22 | class ExprOr; |
| 22 | class ExprPredicate; | 23 | class ExprPredicate; |
| 23 | class ExprVar; | 24 | class ExprVar; |
| 24 | 25 | ||
| 25 | using ExprData = | 26 | using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, |
| 26 | std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>; | 27 | ExprBoolean, ExprGprEqual>; |
| 27 | using Expr = std::shared_ptr<ExprData>; | 28 | using Expr = std::shared_ptr<ExprData>; |
| 28 | 29 | ||
| 29 | class ExprAnd final { | 30 | class ExprAnd final { |
| @@ -118,6 +119,22 @@ public: | |||
| 118 | bool value; | 119 | bool value; |
| 119 | }; | 120 | }; |
| 120 | 121 | ||
| 122 | class ExprGprEqual final { | ||
| 123 | public: | ||
| 124 | ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} | ||
| 125 | |||
| 126 | bool operator==(const ExprGprEqual& b) const { | ||
| 127 | return gpr == b.gpr && value == b.value; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool operator!=(const ExprGprEqual& b) const { | ||
| 131 | return !operator==(b); | ||
| 132 | } | ||
| 133 | |||
| 134 | u32 gpr; | ||
| 135 | u32 value; | ||
| 136 | }; | ||
| 137 | |||
| 121 | template <typename T, typename... Args> | 138 | template <typename T, typename... Args> |
| 122 | Expr MakeExpr(Args&&... args) { | 139 | Expr MakeExpr(Args&&... args) { |
| 123 | static_assert(std::is_convertible_v<T, ExprData>); | 140 | static_assert(std::is_convertible_v<T, ExprData>); |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 338bab17c..447fb5c1d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -410,7 +410,7 @@ public: | |||
| 410 | explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} | 410 | explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {} |
| 411 | 411 | ||
| 412 | explicit OperationNode(OperationCode code, Meta meta) | 412 | explicit OperationNode(OperationCode code, Meta meta) |
| 413 | : OperationNode(code, meta, std::vector<Node>{}) {} | 413 | : OperationNode(code, std::move(meta), std::vector<Node>{}) {} |
| 414 | 414 | ||
| 415 | explicit OperationNode(OperationCode code, std::vector<Node> operands) | 415 | explicit OperationNode(OperationCode code, std::vector<Node> operands) |
| 416 | : OperationNode(code, Meta{}, std::move(operands)) {} | 416 | : OperationNode(code, Meta{}, std::move(operands)) {} |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index c1f2b88c8..1d9825c76 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -2,8 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 5 | #include <cmath> | 7 | #include <cmath> |
| 6 | #include <unordered_map> | ||
| 7 | 8 | ||
| 8 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| @@ -22,10 +23,9 @@ using Tegra::Shader::PredCondition; | |||
| 22 | using Tegra::Shader::PredOperation; | 23 | using Tegra::Shader::PredOperation; |
| 23 | using Tegra::Shader::Register; | 24 | using Tegra::Shader::Register; |
| 24 | 25 | ||
| 25 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, | 26 | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 26 | CompilerSettings settings) | 27 | ConstBufferLocker& locker) |
| 27 | : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, | 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { |
| 28 | program_manager{true, true}, settings{settings} { | ||
| 29 | Decode(); | 29 | Decode(); |
| 30 | } | 30 | } |
| 31 | 31 | ||
| @@ -271,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | |||
| 271 | } | 271 | } |
| 272 | 272 | ||
| 273 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | 273 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { |
| 274 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | 274 | static constexpr std::array comparison_table{ |
| 275 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, | 275 | std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan}, |
| 276 | {PredCondition::Equal, OperationCode::LogicalFEqual}, | 276 | std::pair{PredCondition::Equal, OperationCode::LogicalFEqual}, |
| 277 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, | 277 | std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, |
| 278 | {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, | 278 | std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, |
| 279 | {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, | 279 | std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, |
| 280 | {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, | 280 | std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, |
| 281 | {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, | 281 | std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, |
| 282 | {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, | 282 | std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, |
| 283 | {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, | 283 | std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, |
| 284 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, | 284 | std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, |
| 285 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; | 285 | std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}, |
| 286 | 286 | }; | |
| 287 | const auto comparison{PredicateComparisonTable.find(condition)}; | 287 | |
| 288 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 288 | const auto comparison = |
| 289 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 290 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 291 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 289 | "Unknown predicate comparison operation"); | 292 | "Unknown predicate comparison operation"); |
| 290 | 293 | ||
| 291 | Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | 294 | Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); |
| @@ -306,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N | |||
| 306 | 309 | ||
| 307 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | 310 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, |
| 308 | Node op_b) { | 311 | Node op_b) { |
| 309 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | 312 | static constexpr std::array comparison_table{ |
| 310 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, | 313 | std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan}, |
| 311 | {PredCondition::Equal, OperationCode::LogicalIEqual}, | 314 | std::pair{PredCondition::Equal, OperationCode::LogicalIEqual}, |
| 312 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, | 315 | std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual}, |
| 313 | {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, | 316 | std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, |
| 314 | {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, | 317 | std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual}, |
| 315 | {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, | 318 | std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, |
| 316 | {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, | 319 | std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, |
| 317 | {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, | 320 | std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, |
| 318 | {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, | 321 | std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, |
| 319 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, | 322 | std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, |
| 320 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; | 323 | std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}, |
| 321 | 324 | }; | |
| 322 | const auto comparison{PredicateComparisonTable.find(condition)}; | 325 | |
| 323 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 326 | const auto comparison = |
| 327 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 328 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 329 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 324 | "Unknown predicate comparison operation"); | 330 | "Unknown predicate comparison operation"); |
| 325 | 331 | ||
| 326 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), | 332 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), |
| @@ -337,36 +343,43 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si | |||
| 337 | 343 | ||
| 338 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, | 344 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, |
| 339 | Node op_b) { | 345 | Node op_b) { |
| 340 | const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | 346 | static constexpr std::array comparison_table{ |
| 341 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, | 347 | std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan}, |
| 342 | {PredCondition::Equal, OperationCode::Logical2HEqual}, | 348 | std::pair{PredCondition::Equal, OperationCode::Logical2HEqual}, |
| 343 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, | 349 | std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, |
| 344 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, | 350 | std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, |
| 345 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, | 351 | std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, |
| 346 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, | 352 | std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, |
| 347 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, | 353 | std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan}, |
| 348 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, | 354 | std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan}, |
| 349 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, | 355 | std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan}, |
| 350 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, | 356 | std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan}, |
| 351 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}}; | 357 | std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}, |
| 352 | 358 | }; | |
| 353 | const auto comparison{PredicateComparisonTable.find(condition)}; | 359 | |
| 354 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | 360 | const auto comparison = |
| 361 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 362 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 363 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 355 | "Unknown predicate comparison operation"); | 364 | "Unknown predicate comparison operation"); |
| 356 | 365 | ||
| 357 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); | 366 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); |
| 358 | } | 367 | } |
| 359 | 368 | ||
| 360 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | 369 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { |
| 361 | const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { | 370 | static constexpr std::array operation_table{ |
| 362 | {PredOperation::And, OperationCode::LogicalAnd}, | 371 | OperationCode::LogicalAnd, |
| 363 | {PredOperation::Or, OperationCode::LogicalOr}, | 372 | OperationCode::LogicalOr, |
| 364 | {PredOperation::Xor, OperationCode::LogicalXor}, | 373 | OperationCode::LogicalXor, |
| 365 | }; | 374 | }; |
| 366 | 375 | ||
| 367 | const auto op = PredicateOperationTable.find(operation); | 376 | const auto index = static_cast<std::size_t>(operation); |
| 368 | UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); | 377 | if (index >= operation_table.size()) { |
| 369 | return op->second; | 378 | UNIMPLEMENTED_MSG("Unknown predicate operation."); |
| 379 | return {}; | ||
| 380 | } | ||
| 381 | |||
| 382 | return operation_table[index]; | ||
| 370 | } | 383 | } |
| 371 | 384 | ||
| 372 | Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { | 385 | Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 105981d67..1fd44bde1 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "video_core/engines/shader_header.h" | 17 | #include "video_core/engines/shader_header.h" |
| 18 | #include "video_core/shader/ast.h" | 18 | #include "video_core/shader/ast.h" |
| 19 | #include "video_core/shader/compiler_settings.h" | 19 | #include "video_core/shader/compiler_settings.h" |
| 20 | #include "video_core/shader/const_buffer_locker.h" | ||
| 20 | #include "video_core/shader/node.h" | 21 | #include "video_core/shader/node.h" |
| 21 | 22 | ||
| 22 | namespace VideoCommon::Shader { | 23 | namespace VideoCommon::Shader { |
| @@ -66,8 +67,8 @@ struct GlobalMemoryUsage { | |||
| 66 | 67 | ||
| 67 | class ShaderIR final { | 68 | class ShaderIR final { |
| 68 | public: | 69 | public: |
| 69 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, | 70 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, |
| 70 | CompilerSettings settings); | 71 | ConstBufferLocker& locker); |
| 71 | ~ShaderIR(); | 72 | ~ShaderIR(); |
| 72 | 73 | ||
| 73 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | 74 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { |
| @@ -172,6 +173,13 @@ public: | |||
| 172 | 173 | ||
| 173 | private: | 174 | private: |
| 174 | friend class ASTDecoder; | 175 | friend class ASTDecoder; |
| 176 | |||
| 177 | struct SamplerInfo { | ||
| 178 | Tegra::Shader::TextureType type; | ||
| 179 | bool is_array; | ||
| 180 | bool is_shadow; | ||
| 181 | }; | ||
| 182 | |||
| 175 | void Decode(); | 183 | void Decode(); |
| 176 | 184 | ||
| 177 | NodeBlock DecodeRange(u32 begin, u32 end); | 185 | NodeBlock DecodeRange(u32 begin, u32 end); |
| @@ -296,12 +304,11 @@ private: | |||
| 296 | 304 | ||
| 297 | /// Accesses a texture sampler | 305 | /// Accesses a texture sampler |
| 298 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | 306 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, |
| 299 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow); | 307 | std::optional<SamplerInfo> sampler_info); |
| 300 | 308 | ||
| 301 | // Accesses a texture sampler for a bindless texture. | 309 | // Accesses a texture sampler for a bindless texture. |
| 302 | const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, | 310 | const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, |
| 303 | Tegra::Shader::TextureType type, bool is_array, | 311 | std::optional<SamplerInfo> sampler_info); |
| 304 | bool is_shadow); | ||
| 305 | 312 | ||
| 306 | /// Accesses an image. | 313 | /// Accesses an image. |
| 307 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | 314 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |
| @@ -322,7 +329,7 @@ private: | |||
| 322 | const Node4& components); | 329 | const Node4& components); |
| 323 | 330 | ||
| 324 | void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | 331 | void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |
| 325 | const Node4& components); | 332 | const Node4& components, bool ignore_mask = false); |
| 326 | void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | 333 | void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, |
| 327 | const Node4& components); | 334 | const Node4& components); |
| 328 | 335 | ||
| @@ -371,12 +378,15 @@ private: | |||
| 371 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | 378 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, |
| 372 | s64 cursor) const; | 379 | s64 cursor) const; |
| 373 | 380 | ||
| 374 | std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( | 381 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, |
| 375 | NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); | 382 | Tegra::Shader::Instruction instr, |
| 383 | bool is_write); | ||
| 376 | 384 | ||
| 377 | const ProgramCode& program_code; | 385 | const ProgramCode& program_code; |
| 378 | const u32 main_offset; | 386 | const u32 main_offset; |
| 379 | const std::size_t program_size; | 387 | const CompilerSettings settings; |
| 388 | ConstBufferLocker& locker; | ||
| 389 | |||
| 380 | bool decompiled{}; | 390 | bool decompiled{}; |
| 381 | bool disable_flow_stack{}; | 391 | bool disable_flow_stack{}; |
| 382 | 392 | ||
| @@ -385,8 +395,7 @@ private: | |||
| 385 | 395 | ||
| 386 | std::map<u32, NodeBlock> basic_blocks; | 396 | std::map<u32, NodeBlock> basic_blocks; |
| 387 | NodeBlock global_code; | 397 | NodeBlock global_code; |
| 388 | ASTManager program_manager; | 398 | ASTManager program_manager{true, true}; |
| 389 | CompilerSettings settings{}; | ||
| 390 | 399 | ||
| 391 | std::set<u32> used_registers; | 400 | std::set<u32> used_registers; |
| 392 | std::set<Tegra::Shader::Pred> used_predicates; | 401 | std::set<Tegra::Shader::Pred> used_predicates; |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 250afc6d6..9a3c05288 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -212,6 +212,14 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, | |||
| 212 | break; | 212 | break; |
| 213 | } | 213 | } |
| 214 | break; | 214 | break; |
| 215 | case Tegra::Texture::TextureFormat::A4B4G4R4: | ||
| 216 | switch (component_type) { | ||
| 217 | case Tegra::Texture::ComponentType::UNORM: | ||
| 218 | return PixelFormat::R4G4B4A4U; | ||
| 219 | default: | ||
| 220 | break; | ||
| 221 | } | ||
| 222 | break; | ||
| 215 | case Tegra::Texture::TextureFormat::R8: | 223 | case Tegra::Texture::TextureFormat::R8: |
| 216 | switch (component_type) { | 224 | switch (component_type) { |
| 217 | case Tegra::Texture::ComponentType::UNORM: | 225 | case Tegra::Texture::ComponentType::UNORM: |
| @@ -252,6 +260,7 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, | |||
| 252 | default: | 260 | default: |
| 253 | break; | 261 | break; |
| 254 | } | 262 | } |
| 263 | break; | ||
| 255 | case Tegra::Texture::TextureFormat::R32_G32_B32_A32: | 264 | case Tegra::Texture::TextureFormat::R32_G32_B32_A32: |
| 256 | switch (component_type) { | 265 | switch (component_type) { |
| 257 | case Tegra::Texture::ComponentType::FLOAT: | 266 | case Tegra::Texture::ComponentType::FLOAT: |
| @@ -350,6 +359,16 @@ PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format, | |||
| 350 | return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; | 359 | return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5; |
| 351 | case Tegra::Texture::TextureFormat::ASTC_2D_10X8: | 360 | case Tegra::Texture::TextureFormat::ASTC_2D_10X8: |
| 352 | return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; | 361 | return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8; |
| 362 | case Tegra::Texture::TextureFormat::ASTC_2D_6X6: | ||
| 363 | return is_srgb ? PixelFormat::ASTC_2D_6X6_SRGB : PixelFormat::ASTC_2D_6X6; | ||
| 364 | case Tegra::Texture::TextureFormat::ASTC_2D_10X10: | ||
| 365 | return is_srgb ? PixelFormat::ASTC_2D_10X10_SRGB : PixelFormat::ASTC_2D_10X10; | ||
| 366 | case Tegra::Texture::TextureFormat::ASTC_2D_12X12: | ||
| 367 | return is_srgb ? PixelFormat::ASTC_2D_12X12_SRGB : PixelFormat::ASTC_2D_12X12; | ||
| 368 | case Tegra::Texture::TextureFormat::ASTC_2D_8X6: | ||
| 369 | return is_srgb ? PixelFormat::ASTC_2D_8X6_SRGB : PixelFormat::ASTC_2D_8X6; | ||
| 370 | case Tegra::Texture::TextureFormat::ASTC_2D_6X5: | ||
| 371 | return is_srgb ? PixelFormat::ASTC_2D_6X5_SRGB : PixelFormat::ASTC_2D_6X5; | ||
| 353 | case Tegra::Texture::TextureFormat::R16_G16: | 372 | case Tegra::Texture::TextureFormat::R16_G16: |
| 354 | switch (component_type) { | 373 | switch (component_type) { |
| 355 | case Tegra::Texture::ComponentType::FLOAT: | 374 | case Tegra::Texture::ComponentType::FLOAT: |
| @@ -510,6 +529,16 @@ bool IsPixelFormatASTC(PixelFormat format) { | |||
| 510 | case PixelFormat::ASTC_2D_8X5_SRGB: | 529 | case PixelFormat::ASTC_2D_8X5_SRGB: |
| 511 | case PixelFormat::ASTC_2D_10X8: | 530 | case PixelFormat::ASTC_2D_10X8: |
| 512 | case PixelFormat::ASTC_2D_10X8_SRGB: | 531 | case PixelFormat::ASTC_2D_10X8_SRGB: |
| 532 | case PixelFormat::ASTC_2D_6X6: | ||
| 533 | case PixelFormat::ASTC_2D_6X6_SRGB: | ||
| 534 | case PixelFormat::ASTC_2D_10X10: | ||
| 535 | case PixelFormat::ASTC_2D_10X10_SRGB: | ||
| 536 | case PixelFormat::ASTC_2D_12X12: | ||
| 537 | case PixelFormat::ASTC_2D_12X12_SRGB: | ||
| 538 | case PixelFormat::ASTC_2D_8X6: | ||
| 539 | case PixelFormat::ASTC_2D_8X6_SRGB: | ||
| 540 | case PixelFormat::ASTC_2D_6X5: | ||
| 541 | case PixelFormat::ASTC_2D_6X5_SRGB: | ||
| 513 | return true; | 542 | return true; |
| 514 | default: | 543 | default: |
| 515 | return false; | 544 | return false; |
| @@ -530,6 +559,11 @@ bool IsPixelFormatSRGB(PixelFormat format) { | |||
| 530 | case PixelFormat::ASTC_2D_5X4_SRGB: | 559 | case PixelFormat::ASTC_2D_5X4_SRGB: |
| 531 | case PixelFormat::ASTC_2D_5X5_SRGB: | 560 | case PixelFormat::ASTC_2D_5X5_SRGB: |
| 532 | case PixelFormat::ASTC_2D_10X8_SRGB: | 561 | case PixelFormat::ASTC_2D_10X8_SRGB: |
| 562 | case PixelFormat::ASTC_2D_6X6_SRGB: | ||
| 563 | case PixelFormat::ASTC_2D_10X10_SRGB: | ||
| 564 | case PixelFormat::ASTC_2D_12X12_SRGB: | ||
| 565 | case PixelFormat::ASTC_2D_8X6_SRGB: | ||
| 566 | case PixelFormat::ASTC_2D_6X5_SRGB: | ||
| 533 | return true; | 567 | return true; |
| 534 | default: | 568 | default: |
| 535 | return false; | 569 | return false; |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 1e1c432a5..97668f802 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -67,27 +67,38 @@ enum class PixelFormat { | |||
| 67 | DXT23_SRGB = 49, | 67 | DXT23_SRGB = 49, |
| 68 | DXT45_SRGB = 50, | 68 | DXT45_SRGB = 50, |
| 69 | BC7U_SRGB = 51, | 69 | BC7U_SRGB = 51, |
| 70 | ASTC_2D_4X4_SRGB = 52, | 70 | R4G4B4A4U = 52, |
| 71 | ASTC_2D_8X8_SRGB = 53, | 71 | ASTC_2D_4X4_SRGB = 53, |
| 72 | ASTC_2D_8X5_SRGB = 54, | 72 | ASTC_2D_8X8_SRGB = 54, |
| 73 | ASTC_2D_5X4_SRGB = 55, | 73 | ASTC_2D_8X5_SRGB = 55, |
| 74 | ASTC_2D_5X5 = 56, | 74 | ASTC_2D_5X4_SRGB = 56, |
| 75 | ASTC_2D_5X5_SRGB = 57, | 75 | ASTC_2D_5X5 = 57, |
| 76 | ASTC_2D_10X8 = 58, | 76 | ASTC_2D_5X5_SRGB = 58, |
| 77 | ASTC_2D_10X8_SRGB = 59, | 77 | ASTC_2D_10X8 = 59, |
| 78 | ASTC_2D_10X8_SRGB = 60, | ||
| 79 | ASTC_2D_6X6 = 61, | ||
| 80 | ASTC_2D_6X6_SRGB = 62, | ||
| 81 | ASTC_2D_10X10 = 63, | ||
| 82 | ASTC_2D_10X10_SRGB = 64, | ||
| 83 | ASTC_2D_12X12 = 65, | ||
| 84 | ASTC_2D_12X12_SRGB = 66, | ||
| 85 | ASTC_2D_8X6 = 67, | ||
| 86 | ASTC_2D_8X6_SRGB = 68, | ||
| 87 | ASTC_2D_6X5 = 69, | ||
| 88 | ASTC_2D_6X5_SRGB = 70, | ||
| 78 | 89 | ||
| 79 | MaxColorFormat, | 90 | MaxColorFormat, |
| 80 | 91 | ||
| 81 | // Depth formats | 92 | // Depth formats |
| 82 | Z32F = 60, | 93 | Z32F = 71, |
| 83 | Z16 = 61, | 94 | Z16 = 72, |
| 84 | 95 | ||
| 85 | MaxDepthFormat, | 96 | MaxDepthFormat, |
| 86 | 97 | ||
| 87 | // DepthStencil formats | 98 | // DepthStencil formats |
| 88 | Z24S8 = 62, | 99 | Z24S8 = 73, |
| 89 | S8Z24 = 63, | 100 | S8Z24 = 74, |
| 90 | Z32FS8 = 64, | 101 | Z32FS8 = 75, |
| 91 | 102 | ||
| 92 | MaxDepthStencilFormat, | 103 | MaxDepthStencilFormat, |
| 93 | 104 | ||
| @@ -177,6 +188,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | |||
| 177 | 2, // DXT23_SRGB | 188 | 2, // DXT23_SRGB |
| 178 | 2, // DXT45_SRGB | 189 | 2, // DXT45_SRGB |
| 179 | 2, // BC7U_SRGB | 190 | 2, // BC7U_SRGB |
| 191 | 0, // R4G4B4A4U | ||
| 180 | 2, // ASTC_2D_4X4_SRGB | 192 | 2, // ASTC_2D_4X4_SRGB |
| 181 | 2, // ASTC_2D_8X8_SRGB | 193 | 2, // ASTC_2D_8X8_SRGB |
| 182 | 2, // ASTC_2D_8X5_SRGB | 194 | 2, // ASTC_2D_8X5_SRGB |
| @@ -185,6 +197,16 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ | |||
| 185 | 2, // ASTC_2D_5X5_SRGB | 197 | 2, // ASTC_2D_5X5_SRGB |
| 186 | 2, // ASTC_2D_10X8 | 198 | 2, // ASTC_2D_10X8 |
| 187 | 2, // ASTC_2D_10X8_SRGB | 199 | 2, // ASTC_2D_10X8_SRGB |
| 200 | 2, // ASTC_2D_6X6 | ||
| 201 | 2, // ASTC_2D_6X6_SRGB | ||
| 202 | 2, // ASTC_2D_10X10 | ||
| 203 | 2, // ASTC_2D_10X10_SRGB | ||
| 204 | 2, // ASTC_2D_12X12 | ||
| 205 | 2, // ASTC_2D_12X12_SRGB | ||
| 206 | 2, // ASTC_2D_8X6 | ||
| 207 | 2, // ASTC_2D_8X6_SRGB | ||
| 208 | 2, // ASTC_2D_6X5 | ||
| 209 | 2, // ASTC_2D_6X5_SRGB | ||
| 188 | 0, // Z32F | 210 | 0, // Z32F |
| 189 | 0, // Z16 | 211 | 0, // Z16 |
| 190 | 0, // Z24S8 | 212 | 0, // Z24S8 |
| @@ -261,6 +283,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 261 | 4, // DXT23_SRGB | 283 | 4, // DXT23_SRGB |
| 262 | 4, // DXT45_SRGB | 284 | 4, // DXT45_SRGB |
| 263 | 4, // BC7U_SRGB | 285 | 4, // BC7U_SRGB |
| 286 | 1, // R4G4B4A4U | ||
| 264 | 4, // ASTC_2D_4X4_SRGB | 287 | 4, // ASTC_2D_4X4_SRGB |
| 265 | 8, // ASTC_2D_8X8_SRGB | 288 | 8, // ASTC_2D_8X8_SRGB |
| 266 | 8, // ASTC_2D_8X5_SRGB | 289 | 8, // ASTC_2D_8X5_SRGB |
| @@ -269,6 +292,16 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ | |||
| 269 | 5, // ASTC_2D_5X5_SRGB | 292 | 5, // ASTC_2D_5X5_SRGB |
| 270 | 10, // ASTC_2D_10X8 | 293 | 10, // ASTC_2D_10X8 |
| 271 | 10, // ASTC_2D_10X8_SRGB | 294 | 10, // ASTC_2D_10X8_SRGB |
| 295 | 6, // ASTC_2D_6X6 | ||
| 296 | 6, // ASTC_2D_6X6_SRGB | ||
| 297 | 10, // ASTC_2D_10X10 | ||
| 298 | 10, // ASTC_2D_10X10_SRGB | ||
| 299 | 12, // ASTC_2D_12X12 | ||
| 300 | 12, // ASTC_2D_12X12_SRGB | ||
| 301 | 8, // ASTC_2D_8X6 | ||
| 302 | 8, // ASTC_2D_8X6_SRGB | ||
| 303 | 6, // ASTC_2D_6X5 | ||
| 304 | 6, // ASTC_2D_6X5_SRGB | ||
| 272 | 1, // Z32F | 305 | 1, // Z32F |
| 273 | 1, // Z16 | 306 | 1, // Z16 |
| 274 | 1, // Z24S8 | 307 | 1, // Z24S8 |
| @@ -285,71 +318,82 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) { | |||
| 285 | } | 318 | } |
| 286 | 319 | ||
| 287 | constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ | 320 | constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ |
| 288 | 1, // ABGR8U | 321 | 1, // ABGR8U |
| 289 | 1, // ABGR8S | 322 | 1, // ABGR8S |
| 290 | 1, // ABGR8UI | 323 | 1, // ABGR8UI |
| 291 | 1, // B5G6R5U | 324 | 1, // B5G6R5U |
| 292 | 1, // A2B10G10R10U | 325 | 1, // A2B10G10R10U |
| 293 | 1, // A1B5G5R5U | 326 | 1, // A1B5G5R5U |
| 294 | 1, // R8U | 327 | 1, // R8U |
| 295 | 1, // R8UI | 328 | 1, // R8UI |
| 296 | 1, // RGBA16F | 329 | 1, // RGBA16F |
| 297 | 1, // RGBA16U | 330 | 1, // RGBA16U |
| 298 | 1, // RGBA16UI | 331 | 1, // RGBA16UI |
| 299 | 1, // R11FG11FB10F | 332 | 1, // R11FG11FB10F |
| 300 | 1, // RGBA32UI | 333 | 1, // RGBA32UI |
| 301 | 4, // DXT1 | 334 | 4, // DXT1 |
| 302 | 4, // DXT23 | 335 | 4, // DXT23 |
| 303 | 4, // DXT45 | 336 | 4, // DXT45 |
| 304 | 4, // DXN1 | 337 | 4, // DXN1 |
| 305 | 4, // DXN2UNORM | 338 | 4, // DXN2UNORM |
| 306 | 4, // DXN2SNORM | 339 | 4, // DXN2SNORM |
| 307 | 4, // BC7U | 340 | 4, // BC7U |
| 308 | 4, // BC6H_UF16 | 341 | 4, // BC6H_UF16 |
| 309 | 4, // BC6H_SF16 | 342 | 4, // BC6H_SF16 |
| 310 | 4, // ASTC_2D_4X4 | 343 | 4, // ASTC_2D_4X4 |
| 311 | 1, // BGRA8 | 344 | 1, // BGRA8 |
| 312 | 1, // RGBA32F | 345 | 1, // RGBA32F |
| 313 | 1, // RG32F | 346 | 1, // RG32F |
| 314 | 1, // R32F | 347 | 1, // R32F |
| 315 | 1, // R16F | 348 | 1, // R16F |
| 316 | 1, // R16U | 349 | 1, // R16U |
| 317 | 1, // R16S | 350 | 1, // R16S |
| 318 | 1, // R16UI | 351 | 1, // R16UI |
| 319 | 1, // R16I | 352 | 1, // R16I |
| 320 | 1, // RG16 | 353 | 1, // RG16 |
| 321 | 1, // RG16F | 354 | 1, // RG16F |
| 322 | 1, // RG16UI | 355 | 1, // RG16UI |
| 323 | 1, // RG16I | 356 | 1, // RG16I |
| 324 | 1, // RG16S | 357 | 1, // RG16S |
| 325 | 1, // RGB32F | 358 | 1, // RGB32F |
| 326 | 1, // RGBA8_SRGB | 359 | 1, // RGBA8_SRGB |
| 327 | 1, // RG8U | 360 | 1, // RG8U |
| 328 | 1, // RG8S | 361 | 1, // RG8S |
| 329 | 1, // RG32UI | 362 | 1, // RG32UI |
| 330 | 1, // RGBX16F | 363 | 1, // RGBX16F |
| 331 | 1, // R32UI | 364 | 1, // R32UI |
| 332 | 8, // ASTC_2D_8X8 | 365 | 8, // ASTC_2D_8X8 |
| 333 | 5, // ASTC_2D_8X5 | 366 | 5, // ASTC_2D_8X5 |
| 334 | 4, // ASTC_2D_5X4 | 367 | 4, // ASTC_2D_5X4 |
| 335 | 1, // BGRA8_SRGB | 368 | 1, // BGRA8_SRGB |
| 336 | 4, // DXT1_SRGB | 369 | 4, // DXT1_SRGB |
| 337 | 4, // DXT23_SRGB | 370 | 4, // DXT23_SRGB |
| 338 | 4, // DXT45_SRGB | 371 | 4, // DXT45_SRGB |
| 339 | 4, // BC7U_SRGB | 372 | 4, // BC7U_SRGB |
| 340 | 4, // ASTC_2D_4X4_SRGB | 373 | 1, // R4G4B4A4U |
| 341 | 8, // ASTC_2D_8X8_SRGB | 374 | 4, // ASTC_2D_4X4_SRGB |
| 342 | 5, // ASTC_2D_8X5_SRGB | 375 | 8, // ASTC_2D_8X8_SRGB |
| 343 | 4, // ASTC_2D_5X4_SRGB | 376 | 5, // ASTC_2D_8X5_SRGB |
| 344 | 5, // ASTC_2D_5X5 | 377 | 4, // ASTC_2D_5X4_SRGB |
| 345 | 5, // ASTC_2D_5X5_SRGB | 378 | 5, // ASTC_2D_5X5 |
| 346 | 8, // ASTC_2D_10X8 | 379 | 5, // ASTC_2D_5X5_SRGB |
| 347 | 8, // ASTC_2D_10X8_SRGB | 380 | 8, // ASTC_2D_10X8 |
| 348 | 1, // Z32F | 381 | 8, // ASTC_2D_10X8_SRGB |
| 349 | 1, // Z16 | 382 | 6, // ASTC_2D_6X6 |
| 350 | 1, // Z24S8 | 383 | 6, // ASTC_2D_6X6_SRGB |
| 351 | 1, // S8Z24 | 384 | 10, // ASTC_2D_10X10 |
| 352 | 1, // Z32FS8 | 385 | 10, // ASTC_2D_10X10_SRGB |
| 386 | 12, // ASTC_2D_12X12 | ||
| 387 | 12, // ASTC_2D_12X12_SRGB | ||
| 388 | 6, // ASTC_2D_8X6 | ||
| 389 | 6, // ASTC_2D_8X6_SRGB | ||
| 390 | 5, // ASTC_2D_6X5 | ||
| 391 | 5, // ASTC_2D_6X5_SRGB | ||
| 392 | 1, // Z32F | ||
| 393 | 1, // Z16 | ||
| 394 | 1, // Z24S8 | ||
| 395 | 1, // S8Z24 | ||
| 396 | 1, // Z32FS8 | ||
| 353 | }}; | 397 | }}; |
| 354 | 398 | ||
| 355 | static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { | 399 | static constexpr u32 GetDefaultBlockHeight(PixelFormat format) { |
| @@ -413,6 +457,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 413 | 128, // DXT23_SRGB | 457 | 128, // DXT23_SRGB |
| 414 | 128, // DXT45_SRGB | 458 | 128, // DXT45_SRGB |
| 415 | 128, // BC7U | 459 | 128, // BC7U |
| 460 | 16, // R4G4B4A4U | ||
| 416 | 128, // ASTC_2D_4X4_SRGB | 461 | 128, // ASTC_2D_4X4_SRGB |
| 417 | 128, // ASTC_2D_8X8_SRGB | 462 | 128, // ASTC_2D_8X8_SRGB |
| 418 | 128, // ASTC_2D_8X5_SRGB | 463 | 128, // ASTC_2D_8X5_SRGB |
| @@ -421,6 +466,16 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ | |||
| 421 | 128, // ASTC_2D_5X5_SRGB | 466 | 128, // ASTC_2D_5X5_SRGB |
| 422 | 128, // ASTC_2D_10X8 | 467 | 128, // ASTC_2D_10X8 |
| 423 | 128, // ASTC_2D_10X8_SRGB | 468 | 128, // ASTC_2D_10X8_SRGB |
| 469 | 128, // ASTC_2D_6X6 | ||
| 470 | 128, // ASTC_2D_6X6_SRGB | ||
| 471 | 128, // ASTC_2D_10X10 | ||
| 472 | 128, // ASTC_2D_10X10_SRGB | ||
| 473 | 128, // ASTC_2D_12X12 | ||
| 474 | 128, // ASTC_2D_12X12_SRGB | ||
| 475 | 128, // ASTC_2D_8X6 | ||
| 476 | 128, // ASTC_2D_8X6_SRGB | ||
| 477 | 128, // ASTC_2D_6X5 | ||
| 478 | 128, // ASTC_2D_6X5_SRGB | ||
| 424 | 32, // Z32F | 479 | 32, // Z32F |
| 425 | 16, // Z16 | 480 | 16, // Z16 |
| 426 | 32, // Z24S8 | 481 | 32, // Z24S8 |
| @@ -504,6 +559,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table | |||
| 504 | SurfaceCompression::Compressed, // DXT23_SRGB | 559 | SurfaceCompression::Compressed, // DXT23_SRGB |
| 505 | SurfaceCompression::Compressed, // DXT45_SRGB | 560 | SurfaceCompression::Compressed, // DXT45_SRGB |
| 506 | SurfaceCompression::Compressed, // BC7U_SRGB | 561 | SurfaceCompression::Compressed, // BC7U_SRGB |
| 562 | SurfaceCompression::None, // R4G4B4A4U | ||
| 507 | SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB | 563 | SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB |
| 508 | SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB | 564 | SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB |
| 509 | SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB | 565 | SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB |
| @@ -512,6 +568,16 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table | |||
| 512 | SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB | 568 | SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB |
| 513 | SurfaceCompression::Converted, // ASTC_2D_10X8 | 569 | SurfaceCompression::Converted, // ASTC_2D_10X8 |
| 514 | SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB | 570 | SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB |
| 571 | SurfaceCompression::Converted, // ASTC_2D_6X6 | ||
| 572 | SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB | ||
| 573 | SurfaceCompression::Converted, // ASTC_2D_10X10 | ||
| 574 | SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB | ||
| 575 | SurfaceCompression::Converted, // ASTC_2D_12X12 | ||
| 576 | SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB | ||
| 577 | SurfaceCompression::Converted, // ASTC_2D_8X6 | ||
| 578 | SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB | ||
| 579 | SurfaceCompression::Converted, // ASTC_2D_6X5 | ||
| 580 | SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB | ||
| 515 | SurfaceCompression::None, // Z32F | 581 | SurfaceCompression::None, // Z32F |
| 516 | SurfaceCompression::None, // Z16 | 582 | SurfaceCompression::None, // Z16 |
| 517 | SurfaceCompression::None, // Z24S8 | 583 | SurfaceCompression::None, // Z24S8 |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 683c49207..829268b4c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/algorithm.h" | ||
| 5 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 6 | #include "common/common_types.h" | 7 | #include "common/common_types.h" |
| 7 | #include "common/microprofile.h" | 8 | #include "common/microprofile.h" |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5e497e49f..1bed82898 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -4,12 +4,11 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <optional> |
| 8 | #include <tuple> | ||
| 8 | #include <unordered_map> | 9 | #include <unordered_map> |
| 9 | #include <vector> | 10 | #include <vector> |
| 10 | 11 | ||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/binary_find.h" | ||
| 13 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 14 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| 15 | #include "video_core/morton.h" | 14 | #include "video_core/morton.h" |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ca2da8f97..6a92b22d3 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -62,10 +62,10 @@ public: | |||
| 62 | } | 62 | } |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | /*** | 65 | /** |
| 66 | * `Guard` guarantees that rendertargets don't unregister themselves if the | 66 | * Guarantees that rendertargets don't unregister themselves if the |
| 67 | * collide. Protection is currently only done on 3D slices. | 67 | * collide. Protection is currently only done on 3D slices. |
| 68 | ***/ | 68 | */ |
| 69 | void GuardRenderTargets(bool new_guard) { | 69 | void GuardRenderTargets(bool new_guard) { |
| 70 | guard_render_targets = new_guard; | 70 | guard_render_targets = new_guard; |
| 71 | } | 71 | } |
| @@ -287,7 +287,7 @@ protected: | |||
| 287 | const Tegra::Engines::Fermi2D::Config& copy_config) = 0; | 287 | const Tegra::Engines::Fermi2D::Config& copy_config) = 0; |
| 288 | 288 | ||
| 289 | // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture | 289 | // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture |
| 290 | // and reading it from a sepparate buffer. | 290 | // and reading it from a separate buffer. |
| 291 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | 291 | virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |
| 292 | 292 | ||
| 293 | void ManageRenderTargetUnregister(TSurface& surface) { | 293 | void ManageRenderTargetUnregister(TSurface& surface) { |
| @@ -386,12 +386,13 @@ private: | |||
| 386 | }; | 386 | }; |
| 387 | 387 | ||
| 388 | /** | 388 | /** |
| 389 | * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. | 389 | * Takes care of selecting a proper strategy to deal with a texture recycle. |
| 390 | * @param overlaps, the overlapping surfaces registered in the cache. | 390 | * |
| 391 | * @param params, the paremeters on the new surface. | 391 | * @param overlaps The overlapping surfaces registered in the cache. |
| 392 | * @param gpu_addr, the starting address of the new surface. | 392 | * @param params The parameters on the new surface. |
| 393 | * @param untopological, tells the recycler that the texture has no way to match the overlaps | 393 | * @param gpu_addr The starting address of the new surface. |
| 394 | * due to topological reasons. | 394 | * @param untopological Indicates to the recycler that the texture has no way |
| 395 | * to match the overlaps due to topological reasons. | ||
| 395 | **/ | 396 | **/ |
| 396 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 397 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, |
| 397 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 398 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| @@ -402,7 +403,7 @@ private: | |||
| 402 | if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { | 403 | if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { |
| 403 | return RecycleStrategy::Flush; | 404 | return RecycleStrategy::Flush; |
| 404 | } | 405 | } |
| 405 | for (auto s : overlaps) { | 406 | for (const auto& s : overlaps) { |
| 406 | const auto& s_params = s->GetSurfaceParams(); | 407 | const auto& s_params = s->GetSurfaceParams(); |
| 407 | if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { | 408 | if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { |
| 408 | return RecycleStrategy::Flush; | 409 | return RecycleStrategy::Flush; |
| @@ -419,16 +420,19 @@ private: | |||
| 419 | } | 420 | } |
| 420 | 421 | ||
| 421 | /** | 422 | /** |
| 422 | * `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in | 423 | * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented |
| 423 | *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the | 424 | * strategies: Ignore and Flush. |
| 424 | *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the | 425 | * |
| 425 | *new surface from that data. | 426 | * - Ignore: Just unregisters all the overlaps and loads the new texture. |
| 426 | * @param overlaps, the overlapping surfaces registered in the cache. | 427 | * - Flush: Flushes all the overlaps into memory and loads the new surface from that data. |
| 427 | * @param params, the paremeters on the new surface. | 428 | * |
| 428 | * @param gpu_addr, the starting address of the new surface. | 429 | * @param overlaps The overlapping surfaces registered in the cache. |
| 429 | * @param preserve_contents, tells if the new surface should be loaded from meory or left blank | 430 | * @param params The parameters for the new surface. |
| 430 | * @param untopological, tells the recycler that the texture has no way to match the overlaps | 431 | * @param gpu_addr The starting address of the new surface. |
| 431 | * due to topological reasons. | 432 | * @param preserve_contents Indicates that the new surface should be loaded from memory or left |
| 433 | * blank. | ||
| 434 | * @param untopological Indicates to the recycler that the texture has no way to match the | ||
| 435 | * overlaps due to topological reasons. | ||
| 432 | **/ | 436 | **/ |
| 433 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | 437 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, |
| 434 | const SurfaceParams& params, const GPUVAddr gpu_addr, | 438 | const SurfaceParams& params, const GPUVAddr gpu_addr, |
| @@ -465,10 +469,12 @@ private: | |||
| 465 | } | 469 | } |
| 466 | 470 | ||
| 467 | /** | 471 | /** |
| 468 | * `RebuildSurface` this method takes a single surface and recreates into another that | 472 | * Takes a single surface and recreates into another that may differ in |
| 469 | * may differ in format, target or width alingment. | 473 | * format, target or width alignment. |
| 470 | * @param current_surface, the registered surface in the cache which we want to convert. | 474 | * |
| 471 | * @param params, the new surface params which we'll use to recreate the surface. | 475 | * @param current_surface The registered surface in the cache which we want to convert. |
| 476 | * @param params The new surface params which we'll use to recreate the surface. | ||
| 477 | * @param is_render Whether or not the surface is a render target. | ||
| 472 | **/ | 478 | **/ |
| 473 | std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, | 479 | std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, |
| 474 | bool is_render) { | 480 | bool is_render) { |
| @@ -502,12 +508,14 @@ private: | |||
| 502 | } | 508 | } |
| 503 | 509 | ||
| 504 | /** | 510 | /** |
| 505 | * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's | 511 | * Takes a single surface and checks with the new surface's params if it's an exact |
| 506 | * params if it's an exact match, we return the main view of the registered surface. If it's | 512 | * match, we return the main view of the registered surface. If its formats don't |
| 507 | * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats | 513 | * match, we rebuild the surface. We call this last method a `Mirage`. If formats |
| 508 | * match but the targets don't, we create an overview View of the registered surface. | 514 | * match but the targets don't, we create an overview View of the registered surface. |
| 509 | * @param current_surface, the registered surface in the cache which we want to convert. | 515 | * |
| 510 | * @param params, the new surface params which we want to check. | 516 | * @param current_surface The registered surface in the cache which we want to convert. |
| 517 | * @param params The new surface params which we want to check. | ||
| 518 | * @param is_render Whether or not the surface is a render target. | ||
| 511 | **/ | 519 | **/ |
| 512 | std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, | 520 | std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, |
| 513 | const SurfaceParams& params, bool is_render) { | 521 | const SurfaceParams& params, bool is_render) { |
| @@ -529,13 +537,14 @@ private: | |||
| 529 | } | 537 | } |
| 530 | 538 | ||
| 531 | /** | 539 | /** |
| 532 | * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface | 540 | * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate |
| 533 | * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps | 541 | * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps |
| 534 | * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface | 542 | * of the new surface, if they all match we end up recreating a surface for them, |
| 535 | * for them, else we return nothing. | 543 | * else we return nothing. |
| 536 | * @param overlaps, the overlapping surfaces registered in the cache. | 544 | * |
| 537 | * @param params, the paremeters on the new surface. | 545 | * @param overlaps The overlapping surfaces registered in the cache. |
| 538 | * @param gpu_addr, the starting address of the new surface. | 546 | * @param params The parameters on the new surface. |
| 547 | * @param gpu_addr The starting address of the new surface. | ||
| 539 | **/ | 548 | **/ |
| 540 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | 549 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, |
| 541 | const SurfaceParams& params, | 550 | const SurfaceParams& params, |
| @@ -575,7 +584,7 @@ private: | |||
| 575 | } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { | 584 | } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { |
| 576 | return {}; | 585 | return {}; |
| 577 | } | 586 | } |
| 578 | for (auto surface : overlaps) { | 587 | for (const auto& surface : overlaps) { |
| 579 | Unregister(surface); | 588 | Unregister(surface); |
| 580 | } | 589 | } |
| 581 | new_surface->MarkAsModified(modified, Tick()); | 590 | new_surface->MarkAsModified(modified, Tick()); |
| @@ -584,19 +593,27 @@ private: | |||
| 584 | } | 593 | } |
| 585 | 594 | ||
| 586 | /** | 595 | /** |
| 587 | * `GetSurface` gets the starting address and parameters of a candidate surface and tries | 596 | * Gets the starting address and parameters of a candidate surface and tries |
| 588 | * to find a matching surface within the cache. This is done in 3 big steps. The first is to | 597 | * to find a matching surface within the cache. This is done in 3 big steps: |
| 589 | * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. | 598 | * |
| 590 | * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from | 599 | * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. |
| 591 | * memory else we move to step 3. Step 3 consists on figuring the relationship between the | 600 | * |
| 592 | * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many | 601 | * 2. Check if there are any overlaps at all, if there are none, we just load the texture from |
| 593 | * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the | 602 | * memory else we move to step 3. |
| 594 | * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to | 603 | * |
| 595 | * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface | 604 | * 3. Consists of figuring out the relationship between the candidate texture and the |
| 596 | * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. | 605 | * overlaps. We divide the scenarios depending if there's 1 or many overlaps. If |
| 597 | * @param gpu_addr, the starting address of the candidate surface. | 606 | * there's many, we just try to reconstruct a new surface out of them based on the |
| 598 | * @param params, the paremeters on the candidate surface. | 607 | * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we |
| 599 | * @param preserve_contents, tells if the new surface should be loaded from meory or left blank. | 608 | * have to check if the candidate is a view (layer/mipmap) of the overlap or if the |
| 609 | * registered surface is a mipmap/layer of the candidate. In this last case we reconstruct | ||
| 610 | * a new surface. | ||
| 611 | * | ||
| 612 | * @param gpu_addr The starting address of the candidate surface. | ||
| 613 | * @param params The parameters on the candidate surface. | ||
| 614 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | ||
| 615 | * left blank. | ||
| 616 | * @param is_render Whether or not the surface is a render target. | ||
| 600 | **/ | 617 | **/ |
| 601 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, | 618 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, |
| 602 | bool preserve_contents, bool is_render) { | 619 | bool preserve_contents, bool is_render) { |
| @@ -651,7 +668,7 @@ private: | |||
| 651 | // Step 3 | 668 | // Step 3 |
| 652 | // Now we need to figure the relationship between the texture and its overlaps | 669 | // Now we need to figure the relationship between the texture and its overlaps |
| 653 | // we do a topological test to ensure we can find some relationship. If it fails | 670 | // we do a topological test to ensure we can find some relationship. If it fails |
| 654 | // inmediatly recycle the texture | 671 | // immediately recycle the texture |
| 655 | for (const auto& surface : overlaps) { | 672 | for (const auto& surface : overlaps) { |
| 656 | const auto topological_result = surface->MatchesTopology(params); | 673 | const auto topological_result = surface->MatchesTopology(params); |
| 657 | if (topological_result != MatchTopologyResult::FullMatch) { | 674 | if (topological_result != MatchTopologyResult::FullMatch) { |
| @@ -720,12 +737,13 @@ private: | |||
| 720 | } | 737 | } |
| 721 | 738 | ||
| 722 | /** | 739 | /** |
| 723 | * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries | 740 | * Gets the starting address and parameters of a candidate surface and tries to find a |
| 724 | * to find a matching surface within the cache that's similar to it. If there are many textures | 741 | * matching surface within the cache that's similar to it. If there are many textures |
| 725 | * or the texture found if entirely incompatible, it will fail. If no texture is found, the | 742 | * or the texture found if entirely incompatible, it will fail. If no texture is found, the |
| 726 | * blit will be unsuccessful. | 743 | * blit will be unsuccessful. |
| 727 | * @param gpu_addr, the starting address of the candidate surface. | 744 | * |
| 728 | * @param params, the paremeters on the candidate surface. | 745 | * @param gpu_addr The starting address of the candidate surface. |
| 746 | * @param params The parameters on the candidate surface. | ||
| 729 | **/ | 747 | **/ |
| 730 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | 748 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { |
| 731 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 749 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; |
| @@ -777,11 +795,14 @@ private: | |||
| 777 | } | 795 | } |
| 778 | 796 | ||
| 779 | /** | 797 | /** |
| 780 | * `DeduceBestBlit` gets the a source and destination starting address and parameters, | 798 | * Gets the a source and destination starting address and parameters, |
| 781 | * and tries to deduce if they are supposed to be depth textures. If so, their | 799 | * and tries to deduce if they are supposed to be depth textures. If so, their |
| 782 | * parameters are modified and fixed into so. | 800 | * parameters are modified and fixed into so. |
| 783 | * @param gpu_addr, the starting address of the candidate surface. | 801 | * |
| 784 | * @param params, the parameters on the candidate surface. | 802 | * @param src_params The parameters of the candidate surface. |
| 803 | * @param dst_params The parameters of the destination surface. | ||
| 804 | * @param src_gpu_addr The starting address of the candidate surface. | ||
| 805 | * @param dst_gpu_addr The starting address of the destination surface. | ||
| 785 | **/ | 806 | **/ |
| 786 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, | 807 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, |
| 787 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { | 808 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index a9b8f69af..58b608a36 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -422,7 +422,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { | |||
| 422 | TexelWeightParams params; | 422 | TexelWeightParams params; |
| 423 | 423 | ||
| 424 | // Read the entire block mode all at once | 424 | // Read the entire block mode all at once |
| 425 | uint16_t modeBits = strm.ReadBits(11); | 425 | uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); |
| 426 | 426 | ||
| 427 | // Does this match the void extent block mode? | 427 | // Does this match the void extent block mode? |
| 428 | if ((modeBits & 0x01FF) == 0x1FC) { | 428 | if ((modeBits & 0x01FF) == 0x1FC) { |
| @@ -625,10 +625,10 @@ static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint | |||
| 625 | } | 625 | } |
| 626 | 626 | ||
| 627 | // Decode the RGBA components and renormalize them to the range [0, 255] | 627 | // Decode the RGBA components and renormalize them to the range [0, 255] |
| 628 | uint16_t r = strm.ReadBits(16); | 628 | uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); |
| 629 | uint16_t g = strm.ReadBits(16); | 629 | uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); |
| 630 | uint16_t b = strm.ReadBits(16); | 630 | uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); |
| 631 | uint16_t a = strm.ReadBits(16); | 631 | uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); |
| 632 | 632 | ||
| 633 | uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | | 633 | uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | |
| 634 | (static_cast<uint32_t>(a) & 0xFF00) << 16; | 634 | (static_cast<uint32_t>(a) & 0xFF00) << 16; |
| @@ -681,9 +681,10 @@ protected: | |||
| 681 | 681 | ||
| 682 | public: | 682 | public: |
| 683 | Pixel() = default; | 683 | Pixel() = default; |
| 684 | Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) | 684 | Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) |
| 685 | : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, | 685 | : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, |
| 686 | color{a, r, g, b} {} | 686 | color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), |
| 687 | static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} | ||
| 687 | 688 | ||
| 688 | // Changes the depth of each pixel. This scales the values to | 689 | // Changes the depth of each pixel. This scales the values to |
| 689 | // the appropriate bit depth by either truncating the least | 690 | // the appropriate bit depth by either truncating the least |