diff options
121 files changed, 2844 insertions, 1386 deletions
diff --git a/.travis.yml b/.travis.yml index b0fbe3c5f..9512f7843 100644 --- a/.travis.yml +++ b/.travis.yml | |||
| @@ -24,7 +24,7 @@ matrix: | |||
| 24 | - os: osx | 24 | - os: osx |
| 25 | env: NAME="macos build" | 25 | env: NAME="macos build" |
| 26 | sudo: false | 26 | sudo: false |
| 27 | osx_image: xcode10 | 27 | osx_image: xcode10.1 |
| 28 | install: "./.travis/macos/deps.sh" | 28 | install: "./.travis/macos/deps.sh" |
| 29 | script: "./.travis/macos/build.sh" | 29 | script: "./.travis/macos/build.sh" |
| 30 | after_success: "./.travis/macos/upload.sh" | 30 | after_success: "./.travis/macos/upload.sh" |
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh index 4a14837fc..b7b4c6f8c 100755 --- a/.travis/macos/build.sh +++ b/.travis/macos/build.sh | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | 2 | ||
| 3 | set -o pipefail | 3 | set -o pipefail |
| 4 | 4 | ||
| 5 | export MACOSX_DEPLOYMENT_TARGET=10.13 | 5 | export MACOSX_DEPLOYMENT_TARGET=10.14 |
| 6 | export Qt5_DIR=$(brew --prefix)/opt/qt5 | 6 | export Qt5_DIR=$(brew --prefix)/opt/qt5 |
| 7 | export UNICORNDIR=$(pwd)/externals/unicorn | 7 | export UNICORNDIR=$(pwd)/externals/unicorn |
| 8 | export PATH="/usr/local/opt/ccache/libexec:$PATH" | 8 | export PATH="/usr/local/opt/ccache/libexec:$PATH" |
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index 78728e08b..08315a1f1 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake | |||
| @@ -73,6 +73,7 @@ set(HASH_FILES | |||
| 73 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | 73 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" |
| 74 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | 74 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" |
| 75 | "${VIDEO_CORE}/shader/decode/memory.cpp" | 75 | "${VIDEO_CORE}/shader/decode/memory.cpp" |
| 76 | "${VIDEO_CORE}/shader/decode/texture.cpp" | ||
| 76 | "${VIDEO_CORE}/shader/decode/other.cpp" | 77 | "${VIDEO_CORE}/shader/decode/other.cpp" |
| 77 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" | 78 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" |
| 78 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" | 79 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" |
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index 1da0b9f2a..7047ed9cf 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "common/ring_buffer.h" | 12 | #include "common/ring_buffer.h" |
| 13 | #include "core/settings.h" | 13 | #include "core/settings.h" |
| 14 | 14 | ||
| 15 | #ifdef _MSC_VER | 15 | #ifdef _WIN32 |
| 16 | #include <objbase.h> | 16 | #include <objbase.h> |
| 17 | #endif | 17 | #endif |
| 18 | 18 | ||
| @@ -113,7 +113,7 @@ private: | |||
| 113 | 113 | ||
| 114 | CubebSink::CubebSink(std::string_view target_device_name) { | 114 | CubebSink::CubebSink(std::string_view target_device_name) { |
| 115 | // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows | 115 | // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows |
| 116 | #ifdef _MSC_VER | 116 | #ifdef _WIN32 |
| 117 | com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); | 117 | com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); |
| 118 | #endif | 118 | #endif |
| 119 | 119 | ||
| @@ -152,7 +152,7 @@ CubebSink::~CubebSink() { | |||
| 152 | 152 | ||
| 153 | cubeb_destroy(ctx); | 153 | cubeb_destroy(ctx); |
| 154 | 154 | ||
| 155 | #ifdef _MSC_VER | 155 | #ifdef _WIN32 |
| 156 | if (SUCCEEDED(com_init_result)) { | 156 | if (SUCCEEDED(com_init_result)) { |
| 157 | CoUninitialize(); | 157 | CoUninitialize(); |
| 158 | } | 158 | } |
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h index 511df7bb1..7ce850f47 100644 --- a/src/audio_core/cubeb_sink.h +++ b/src/audio_core/cubeb_sink.h | |||
| @@ -26,7 +26,7 @@ private: | |||
| 26 | cubeb_devid output_device{}; | 26 | cubeb_devid output_device{}; |
| 27 | std::vector<SinkStreamPtr> sink_streams; | 27 | std::vector<SinkStreamPtr> sink_streams; |
| 28 | 28 | ||
| 29 | #ifdef _MSC_VER | 29 | #ifdef _WIN32 |
| 30 | u32 com_init_result = 0; | 30 | u32 com_init_result = 0; |
| 31 | #endif | 31 | #endif |
| 32 | }; | 32 | }; |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index bdd885273..3d30f0e3e 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 47 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | 47 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" |
| 48 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | 48 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" |
| 49 | "${VIDEO_CORE}/shader/decode/memory.cpp" | 49 | "${VIDEO_CORE}/shader/decode/memory.cpp" |
| 50 | "${VIDEO_CORE}/shader/decode/texture.cpp" | ||
| 50 | "${VIDEO_CORE}/shader/decode/other.cpp" | 51 | "${VIDEO_CORE}/shader/decode/other.cpp" |
| 51 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" | 52 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" |
| 52 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" | 53 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" |
diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 21e07925d..7433c39ba 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h | |||
| @@ -111,12 +111,6 @@ | |||
| 111 | template <std::size_t Position, std::size_t Bits, typename T> | 111 | template <std::size_t Position, std::size_t Bits, typename T> |
| 112 | struct BitField { | 112 | struct BitField { |
| 113 | private: | 113 | private: |
| 114 | // We hide the copy assigment operator here, because the default copy | ||
| 115 | // assignment would copy the full storage value, rather than just the bits | ||
| 116 | // relevant to this particular bit field. | ||
| 117 | // We don't delete it because we want BitField to be trivially copyable. | ||
| 118 | constexpr BitField& operator=(const BitField&) = default; | ||
| 119 | |||
| 120 | // UnderlyingType is T for non-enum types and the underlying type of T if | 114 | // UnderlyingType is T for non-enum types and the underlying type of T if |
| 121 | // T is an enumeration. Note that T is wrapped within an enable_if in the | 115 | // T is an enumeration. Note that T is wrapped within an enable_if in the |
| 122 | // former case to workaround compile errors which arise when using | 116 | // former case to workaround compile errors which arise when using |
| @@ -163,9 +157,13 @@ public: | |||
| 163 | BitField(T val) = delete; | 157 | BitField(T val) = delete; |
| 164 | BitField& operator=(T val) = delete; | 158 | BitField& operator=(T val) = delete; |
| 165 | 159 | ||
| 166 | // Force default constructor to be created | 160 | constexpr BitField() noexcept = default; |
| 167 | // so that we can use this within unions | 161 | |
| 168 | constexpr BitField() = default; | 162 | constexpr BitField(const BitField&) noexcept = default; |
| 163 | constexpr BitField& operator=(const BitField&) noexcept = default; | ||
| 164 | |||
| 165 | constexpr BitField(BitField&&) noexcept = default; | ||
| 166 | constexpr BitField& operator=(BitField&&) noexcept = default; | ||
| 169 | 167 | ||
| 170 | constexpr FORCE_INLINE operator T() const { | 168 | constexpr FORCE_INLINE operator T() const { |
| 171 | return Value(); | 169 | return Value(); |
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index b369f199f..4462ff3fb 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp | |||
| @@ -39,8 +39,10 @@ public: | |||
| 39 | Impl(Impl const&) = delete; | 39 | Impl(Impl const&) = delete; |
| 40 | const Impl& operator=(Impl const&) = delete; | 40 | const Impl& operator=(Impl const&) = delete; |
| 41 | 41 | ||
| 42 | void PushEntry(Entry e) { | 42 | void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, |
| 43 | message_queue.Push(std::move(e)); | 43 | const char* function, std::string message) { |
| 44 | message_queue.Push( | ||
| 45 | CreateEntry(log_class, log_level, filename, line_num, function, std::move(message))); | ||
| 44 | } | 46 | } |
| 45 | 47 | ||
| 46 | void AddBackend(std::unique_ptr<Backend> backend) { | 48 | void AddBackend(std::unique_ptr<Backend> backend) { |
| @@ -108,11 +110,30 @@ private: | |||
| 108 | backend_thread.join(); | 110 | backend_thread.join(); |
| 109 | } | 111 | } |
| 110 | 112 | ||
| 113 | Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, | ||
| 114 | const char* function, std::string message) const { | ||
| 115 | using std::chrono::duration_cast; | ||
| 116 | using std::chrono::steady_clock; | ||
| 117 | |||
| 118 | Entry entry; | ||
| 119 | entry.timestamp = | ||
| 120 | duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin); | ||
| 121 | entry.log_class = log_class; | ||
| 122 | entry.log_level = log_level; | ||
| 123 | entry.filename = Common::TrimSourcePath(filename); | ||
| 124 | entry.line_num = line_nr; | ||
| 125 | entry.function = function; | ||
| 126 | entry.message = std::move(message); | ||
| 127 | |||
| 128 | return entry; | ||
| 129 | } | ||
| 130 | |||
| 111 | std::mutex writing_mutex; | 131 | std::mutex writing_mutex; |
| 112 | std::thread backend_thread; | 132 | std::thread backend_thread; |
| 113 | std::vector<std::unique_ptr<Backend>> backends; | 133 | std::vector<std::unique_ptr<Backend>> backends; |
| 114 | Common::MPSCQueue<Log::Entry> message_queue; | 134 | Common::MPSCQueue<Log::Entry> message_queue; |
| 115 | Filter filter; | 135 | Filter filter; |
| 136 | std::chrono::steady_clock::time_point time_origin{std::chrono::steady_clock::now()}; | ||
| 116 | }; | 137 | }; |
| 117 | 138 | ||
| 118 | void ConsoleBackend::Write(const Entry& entry) { | 139 | void ConsoleBackend::Write(const Entry& entry) { |
| @@ -271,25 +292,6 @@ const char* GetLevelName(Level log_level) { | |||
| 271 | #undef LVL | 292 | #undef LVL |
| 272 | } | 293 | } |
| 273 | 294 | ||
| 274 | Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, | ||
| 275 | const char* function, std::string message) { | ||
| 276 | using std::chrono::duration_cast; | ||
| 277 | using std::chrono::steady_clock; | ||
| 278 | |||
| 279 | static steady_clock::time_point time_origin = steady_clock::now(); | ||
| 280 | |||
| 281 | Entry entry; | ||
| 282 | entry.timestamp = duration_cast<std::chrono::microseconds>(steady_clock::now() - time_origin); | ||
| 283 | entry.log_class = log_class; | ||
| 284 | entry.log_level = log_level; | ||
| 285 | entry.filename = Common::TrimSourcePath(filename); | ||
| 286 | entry.line_num = line_nr; | ||
| 287 | entry.function = function; | ||
| 288 | entry.message = std::move(message); | ||
| 289 | |||
| 290 | return entry; | ||
| 291 | } | ||
| 292 | |||
| 293 | void SetGlobalFilter(const Filter& filter) { | 295 | void SetGlobalFilter(const Filter& filter) { |
| 294 | Impl::Instance().SetGlobalFilter(filter); | 296 | Impl::Instance().SetGlobalFilter(filter); |
| 295 | } | 297 | } |
| @@ -314,9 +316,7 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, | |||
| 314 | if (!filter.CheckMessage(log_class, log_level)) | 316 | if (!filter.CheckMessage(log_class, log_level)) |
| 315 | return; | 317 | return; |
| 316 | 318 | ||
| 317 | Entry entry = | 319 | instance.PushEntry(log_class, log_level, filename, line_num, function, |
| 318 | CreateEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); | 320 | fmt::vformat(format, args)); |
| 319 | |||
| 320 | instance.PushEntry(std::move(entry)); | ||
| 321 | } | 321 | } |
| 322 | } // namespace Log | 322 | } // namespace Log |
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index a31ee6968..fca0267a1 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h | |||
| @@ -135,10 +135,6 @@ const char* GetLogClassName(Class log_class); | |||
| 135 | */ | 135 | */ |
| 136 | const char* GetLevelName(Level log_level); | 136 | const char* GetLevelName(Level log_level); |
| 137 | 137 | ||
| 138 | /// Creates a log entry by formatting the given source location, and message. | ||
| 139 | Entry CreateEntry(Class log_class, Level log_level, const char* filename, unsigned int line_nr, | ||
| 140 | const char* function, std::string message); | ||
| 141 | |||
| 142 | /** | 138 | /** |
| 143 | * The global filter will prevent any messages from even being processed if they are filtered. Each | 139 | * The global filter will prevent any messages from even being processed if they are filtered. Each |
| 144 | * backend can have a filter, but if the level is lower than the global filter, the backend will | 140 | * backend can have a filter, but if the level is lower than the global filter, the backend will |
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 988356c65..8ccb2d5f0 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -217,6 +217,7 @@ add_library(core STATIC | |||
| 217 | hle/service/audio/audren_u.h | 217 | hle/service/audio/audren_u.h |
| 218 | hle/service/audio/codecctl.cpp | 218 | hle/service/audio/codecctl.cpp |
| 219 | hle/service/audio/codecctl.h | 219 | hle/service/audio/codecctl.h |
| 220 | hle/service/audio/errors.h | ||
| 220 | hle/service/audio/hwopus.cpp | 221 | hle/service/audio/hwopus.cpp |
| 221 | hle/service/audio/hwopus.h | 222 | hle/service/audio/hwopus.h |
| 222 | hle/service/bcat/bcat.cpp | 223 | hle/service/bcat/bcat.cpp |
diff --git a/src/core/core.cpp b/src/core/core.cpp index ab7181a05..89b3fb418 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -36,7 +36,8 @@ | |||
| 36 | #include "frontend/applets/software_keyboard.h" | 36 | #include "frontend/applets/software_keyboard.h" |
| 37 | #include "frontend/applets/web_browser.h" | 37 | #include "frontend/applets/web_browser.h" |
| 38 | #include "video_core/debug_utils/debug_utils.h" | 38 | #include "video_core/debug_utils/debug_utils.h" |
| 39 | #include "video_core/gpu.h" | 39 | #include "video_core/gpu_asynch.h" |
| 40 | #include "video_core/gpu_synch.h" | ||
| 40 | #include "video_core/renderer_base.h" | 41 | #include "video_core/renderer_base.h" |
| 41 | #include "video_core/video_core.h" | 42 | #include "video_core/video_core.h" |
| 42 | 43 | ||
| @@ -78,6 +79,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||
| 78 | return vfs->OpenFile(path, FileSys::Mode::Read); | 79 | return vfs->OpenFile(path, FileSys::Mode::Read); |
| 79 | } | 80 | } |
| 80 | struct System::Impl { | 81 | struct System::Impl { |
| 82 | explicit Impl(System& system) : kernel{system} {} | ||
| 81 | 83 | ||
| 82 | Cpu& CurrentCpuCore() { | 84 | Cpu& CurrentCpuCore() { |
| 83 | return cpu_core_manager.GetCurrentCore(); | 85 | return cpu_core_manager.GetCurrentCore(); |
| @@ -95,7 +97,7 @@ struct System::Impl { | |||
| 95 | LOG_DEBUG(HW_Memory, "initialized OK"); | 97 | LOG_DEBUG(HW_Memory, "initialized OK"); |
| 96 | 98 | ||
| 97 | core_timing.Initialize(); | 99 | core_timing.Initialize(); |
| 98 | kernel.Initialize(core_timing); | 100 | kernel.Initialize(); |
| 99 | 101 | ||
| 100 | const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( | 102 | const auto current_time = std::chrono::duration_cast<std::chrono::seconds>( |
| 101 | std::chrono::system_clock::now().time_since_epoch()); | 103 | std::chrono::system_clock::now().time_since_epoch()); |
| @@ -114,7 +116,7 @@ struct System::Impl { | |||
| 114 | if (web_browser == nullptr) | 116 | if (web_browser == nullptr) |
| 115 | web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); | 117 | web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>(); |
| 116 | 118 | ||
| 117 | auto main_process = Kernel::Process::Create(kernel, "main"); | 119 | auto main_process = Kernel::Process::Create(system, "main"); |
| 118 | kernel.MakeCurrentProcess(main_process.get()); | 120 | kernel.MakeCurrentProcess(main_process.get()); |
| 119 | 121 | ||
| 120 | telemetry_session = std::make_unique<Core::TelemetrySession>(); | 122 | telemetry_session = std::make_unique<Core::TelemetrySession>(); |
| @@ -128,10 +130,16 @@ struct System::Impl { | |||
| 128 | return ResultStatus::ErrorVideoCore; | 130 | return ResultStatus::ErrorVideoCore; |
| 129 | } | 131 | } |
| 130 | 132 | ||
| 131 | gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); | 133 | is_powered_on = true; |
| 134 | |||
| 135 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 136 | gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer); | ||
| 137 | } else { | ||
| 138 | gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer); | ||
| 139 | } | ||
| 132 | 140 | ||
| 133 | cpu_core_manager.Initialize(system); | 141 | cpu_core_manager.Initialize(system); |
| 134 | is_powered_on = true; | 142 | |
| 135 | LOG_DEBUG(Core, "Initialized OK"); | 143 | LOG_DEBUG(Core, "Initialized OK"); |
| 136 | 144 | ||
| 137 | // Reset counters and set time origin to current frame | 145 | // Reset counters and set time origin to current frame |
| @@ -182,13 +190,13 @@ struct System::Impl { | |||
| 182 | 190 | ||
| 183 | void Shutdown() { | 191 | void Shutdown() { |
| 184 | // Log last frame performance stats | 192 | // Log last frame performance stats |
| 185 | auto perf_results = GetAndResetPerfStats(); | 193 | const auto perf_results = GetAndResetPerfStats(); |
| 186 | Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", | 194 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", |
| 187 | perf_results.emulation_speed * 100.0); | 195 | perf_results.emulation_speed * 100.0); |
| 188 | Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", | 196 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", |
| 189 | perf_results.game_fps); | 197 | perf_results.game_fps); |
| 190 | Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", | 198 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", |
| 191 | perf_results.frametime * 1000.0); | 199 | perf_results.frametime * 1000.0); |
| 192 | 200 | ||
| 193 | is_powered_on = false; | 201 | is_powered_on = false; |
| 194 | 202 | ||
| @@ -265,7 +273,7 @@ struct System::Impl { | |||
| 265 | Core::FrameLimiter frame_limiter; | 273 | Core::FrameLimiter frame_limiter; |
| 266 | }; | 274 | }; |
| 267 | 275 | ||
| 268 | System::System() : impl{std::make_unique<Impl>()} {} | 276 | System::System() : impl{std::make_unique<Impl>(*this)} {} |
| 269 | System::~System() = default; | 277 | System::~System() = default; |
| 270 | 278 | ||
| 271 | Cpu& System::CurrentCpuCore() { | 279 | Cpu& System::CurrentCpuCore() { |
diff --git a/src/core/core.h b/src/core/core.h index d720013f7..ba76a41d8 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() { | |||
| 293 | return System::GetInstance().CurrentArmInterface(); | 293 | return System::GetInstance().CurrentArmInterface(); |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | inline TelemetrySession& Telemetry() { | ||
| 297 | return System::GetInstance().TelemetrySession(); | ||
| 298 | } | ||
| 299 | |||
| 300 | inline Kernel::Process* CurrentProcess() { | 296 | inline Kernel::Process* CurrentProcess() { |
| 301 | return System::GetInstance().CurrentProcess(); | 297 | return System::GetInstance().CurrentProcess(); |
| 302 | } | 298 | } |
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 54aa21a3a..1eefed6d0 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #endif | 11 | #endif |
| 12 | #include "core/arm/exclusive_monitor.h" | 12 | #include "core/arm/exclusive_monitor.h" |
| 13 | #include "core/arm/unicorn/arm_unicorn.h" | 13 | #include "core/arm/unicorn/arm_unicorn.h" |
| 14 | #include "core/core.h" | ||
| 14 | #include "core/core_cpu.h" | 15 | #include "core/core_cpu.h" |
| 15 | #include "core/core_timing.h" | 16 | #include "core/core_timing.h" |
| 16 | #include "core/hle/kernel/scheduler.h" | 17 | #include "core/hle/kernel/scheduler.h" |
| @@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() { | |||
| 49 | return false; | 50 | return false; |
| 50 | } | 51 | } |
| 51 | 52 | ||
| 52 | Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, | 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, |
| 53 | CpuBarrier& cpu_barrier, std::size_t core_index) | 54 | std::size_t core_index) |
| 54 | : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} { | 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { |
| 55 | if (Settings::values.use_cpu_jit) { | 56 | if (Settings::values.use_cpu_jit) { |
| 56 | #ifdef ARCHITECTURE_x86_64 | 57 | #ifdef ARCHITECTURE_x86_64 |
| 57 | arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); | 58 | arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); |
| @@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, | |||
| 63 | arm_interface = std::make_unique<ARM_Unicorn>(core_timing); | 64 | arm_interface = std::make_unique<ARM_Unicorn>(core_timing); |
| 64 | } | 65 | } |
| 65 | 66 | ||
| 66 | scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); | 67 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); |
| 67 | } | 68 | } |
| 68 | 69 | ||
| 69 | Cpu::~Cpu() = default; | 70 | Cpu::~Cpu() = default; |
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index e2204c6b0..7589beb8c 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h | |||
| @@ -15,6 +15,10 @@ namespace Kernel { | |||
| 15 | class Scheduler; | 15 | class Scheduler; |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | namespace Core { | ||
| 19 | class System; | ||
| 20 | } | ||
| 21 | |||
| 18 | namespace Core::Timing { | 22 | namespace Core::Timing { |
| 19 | class CoreTiming; | 23 | class CoreTiming; |
| 20 | } | 24 | } |
| @@ -45,8 +49,8 @@ private: | |||
| 45 | 49 | ||
| 46 | class Cpu { | 50 | class Cpu { |
| 47 | public: | 51 | public: |
| 48 | Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, | 52 | Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, |
| 49 | CpuBarrier& cpu_barrier, std::size_t core_index); | 53 | std::size_t core_index); |
| 50 | ~Cpu(); | 54 | ~Cpu(); |
| 51 | 55 | ||
| 52 | void RunLoop(bool tight_loop = true); | 56 | void RunLoop(bool tight_loop = true); |
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 2ddb3610d..93bc5619c 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp | |||
| @@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) { | |||
| 27 | exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); | 27 | exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); |
| 28 | 28 | ||
| 29 | for (std::size_t index = 0; index < cores.size(); ++index) { | 29 | for (std::size_t index = 0; index < cores.size(); ++index) { |
| 30 | cores[index] = | 30 | cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); |
| 31 | std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index); | ||
| 32 | } | 31 | } |
| 33 | 32 | ||
| 34 | // Create threads for CPU cores 1-3, and build thread_to_cpu map | 33 | // Create threads for CPU cores 1-3, and build thread_to_cpu map |
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h index ed84197b3..455d1f346 100644 --- a/src/core/hle/ipc.h +++ b/src/core/hle/ipc.h | |||
| @@ -4,10 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_funcs.h" | ||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 8 | #include "common/swap.h" | 10 | #include "common/swap.h" |
| 9 | #include "core/hle/kernel/errors.h" | ||
| 10 | #include "core/memory.h" | ||
| 11 | 11 | ||
| 12 | namespace IPC { | 12 | namespace IPC { |
| 13 | 13 | ||
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index 90f276ee8..0d8368546 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h | |||
| @@ -350,7 +350,7 @@ public: | |||
| 350 | template <class T> | 350 | template <class T> |
| 351 | std::shared_ptr<T> PopIpcInterface() { | 351 | std::shared_ptr<T> PopIpcInterface() { |
| 352 | ASSERT(context->Session()->IsDomain()); | 352 | ASSERT(context->Session()->IsDomain()); |
| 353 | ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); | 353 | ASSERT(context->GetDomainMessageHeader().input_object_count > 0); |
| 354 | return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); | 354 | return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); |
| 355 | } | 355 | } |
| 356 | }; | 356 | }; |
| @@ -362,6 +362,11 @@ inline u32 RequestParser::Pop() { | |||
| 362 | return cmdbuf[index++]; | 362 | return cmdbuf[index++]; |
| 363 | } | 363 | } |
| 364 | 364 | ||
| 365 | template <> | ||
| 366 | inline s32 RequestParser::Pop() { | ||
| 367 | return static_cast<s32>(Pop<u32>()); | ||
| 368 | } | ||
| 369 | |||
| 365 | template <typename T> | 370 | template <typename T> |
| 366 | void RequestParser::PopRaw(T& value) { | 371 | void RequestParser::PopRaw(T& value) { |
| 367 | std::memcpy(&value, cmdbuf + index, sizeof(T)); | 372 | std::memcpy(&value, cmdbuf + index, sizeof(T)); |
| @@ -393,6 +398,16 @@ inline u64 RequestParser::Pop() { | |||
| 393 | } | 398 | } |
| 394 | 399 | ||
| 395 | template <> | 400 | template <> |
| 401 | inline s8 RequestParser::Pop() { | ||
| 402 | return static_cast<s8>(Pop<u8>()); | ||
| 403 | } | ||
| 404 | |||
| 405 | template <> | ||
| 406 | inline s16 RequestParser::Pop() { | ||
| 407 | return static_cast<s16>(Pop<u16>()); | ||
| 408 | } | ||
| 409 | |||
| 410 | template <> | ||
| 396 | inline s64 RequestParser::Pop() { | 411 | inline s64 RequestParser::Pop() { |
| 397 | return static_cast<s64>(Pop<u64>()); | 412 | return static_cast<s64>(Pop<u64>()); |
| 398 | } | 413 | } |
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index a250d088d..352190da8 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/core_cpu.h" | 11 | #include "core/core_cpu.h" |
| 12 | #include "core/hle/kernel/address_arbiter.h" | ||
| 12 | #include "core/hle/kernel/errors.h" | 13 | #include "core/hle/kernel/errors.h" |
| 13 | #include "core/hle/kernel/object.h" | 14 | #include "core/hle/kernel/object.h" |
| 14 | #include "core/hle/kernel/process.h" | 15 | #include "core/hle/kernel/process.h" |
| @@ -17,58 +18,16 @@ | |||
| 17 | #include "core/hle/result.h" | 18 | #include "core/hle/result.h" |
| 18 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 19 | 20 | ||
| 20 | namespace Kernel::AddressArbiter { | 21 | namespace Kernel { |
| 21 | 22 | namespace { | |
| 22 | // Performs actual address waiting logic. | ||
| 23 | static ResultCode WaitForAddress(VAddr address, s64 timeout) { | ||
| 24 | SharedPtr<Thread> current_thread = GetCurrentThread(); | ||
| 25 | current_thread->SetArbiterWaitAddress(address); | ||
| 26 | current_thread->SetStatus(ThreadStatus::WaitArb); | ||
| 27 | current_thread->InvalidateWakeupCallback(); | ||
| 28 | |||
| 29 | current_thread->WakeAfterDelay(timeout); | ||
| 30 | |||
| 31 | Core::System::GetInstance().CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); | ||
| 32 | return RESULT_TIMEOUT; | ||
| 33 | } | ||
| 34 | |||
| 35 | // Gets the threads waiting on an address. | ||
| 36 | static std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) { | ||
| 37 | const auto RetrieveWaitingThreads = [](std::size_t core_index, | ||
| 38 | std::vector<SharedPtr<Thread>>& waiting_threads, | ||
| 39 | VAddr arb_addr) { | ||
| 40 | const auto& scheduler = Core::System::GetInstance().Scheduler(core_index); | ||
| 41 | const auto& thread_list = scheduler.GetThreadList(); | ||
| 42 | |||
| 43 | for (const auto& thread : thread_list) { | ||
| 44 | if (thread->GetArbiterWaitAddress() == arb_addr) | ||
| 45 | waiting_threads.push_back(thread); | ||
| 46 | } | ||
| 47 | }; | ||
| 48 | |||
| 49 | // Retrieve all threads that are waiting for this address. | ||
| 50 | std::vector<SharedPtr<Thread>> threads; | ||
| 51 | RetrieveWaitingThreads(0, threads, address); | ||
| 52 | RetrieveWaitingThreads(1, threads, address); | ||
| 53 | RetrieveWaitingThreads(2, threads, address); | ||
| 54 | RetrieveWaitingThreads(3, threads, address); | ||
| 55 | |||
| 56 | // Sort them by priority, such that the highest priority ones come first. | ||
| 57 | std::sort(threads.begin(), threads.end(), | ||
| 58 | [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) { | ||
| 59 | return lhs->GetPriority() < rhs->GetPriority(); | ||
| 60 | }); | ||
| 61 | |||
| 62 | return threads; | ||
| 63 | } | ||
| 64 | |||
| 65 | // Wake up num_to_wake (or all) threads in a vector. | 23 | // Wake up num_to_wake (or all) threads in a vector. |
| 66 | static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { | 24 | void WakeThreads(const std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) { |
| 67 | // Only process up to 'target' threads, unless 'target' is <= 0, in which case process | 25 | // Only process up to 'target' threads, unless 'target' is <= 0, in which case process |
| 68 | // them all. | 26 | // them all. |
| 69 | std::size_t last = waiting_threads.size(); | 27 | std::size_t last = waiting_threads.size(); |
| 70 | if (num_to_wake > 0) | 28 | if (num_to_wake > 0) { |
| 71 | last = num_to_wake; | 29 | last = num_to_wake; |
| 30 | } | ||
| 72 | 31 | ||
| 73 | // Signal the waiting threads. | 32 | // Signal the waiting threads. |
| 74 | for (std::size_t i = 0; i < last; i++) { | 33 | for (std::size_t i = 0; i < last; i++) { |
| @@ -78,42 +37,55 @@ static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num | |||
| 78 | waiting_threads[i]->ResumeFromWait(); | 37 | waiting_threads[i]->ResumeFromWait(); |
| 79 | } | 38 | } |
| 80 | } | 39 | } |
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | AddressArbiter::AddressArbiter(Core::System& system) : system{system} {} | ||
| 43 | AddressArbiter::~AddressArbiter() = default; | ||
| 44 | |||
| 45 | ResultCode AddressArbiter::SignalToAddress(VAddr address, SignalType type, s32 value, | ||
| 46 | s32 num_to_wake) { | ||
| 47 | switch (type) { | ||
| 48 | case SignalType::Signal: | ||
| 49 | return SignalToAddressOnly(address, num_to_wake); | ||
| 50 | case SignalType::IncrementAndSignalIfEqual: | ||
| 51 | return IncrementAndSignalToAddressIfEqual(address, value, num_to_wake); | ||
| 52 | case SignalType::ModifyByWaitingCountAndSignalIfEqual: | ||
| 53 | return ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, num_to_wake); | ||
| 54 | default: | ||
| 55 | return ERR_INVALID_ENUM_VALUE; | ||
| 56 | } | ||
| 57 | } | ||
| 81 | 58 | ||
| 82 | // Signals an address being waited on. | 59 | ResultCode AddressArbiter::SignalToAddressOnly(VAddr address, s32 num_to_wake) { |
| 83 | ResultCode SignalToAddress(VAddr address, s32 num_to_wake) { | 60 | const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); |
| 84 | std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); | ||
| 85 | |||
| 86 | WakeThreads(waiting_threads, num_to_wake); | 61 | WakeThreads(waiting_threads, num_to_wake); |
| 87 | return RESULT_SUCCESS; | 62 | return RESULT_SUCCESS; |
| 88 | } | 63 | } |
| 89 | 64 | ||
| 90 | // Signals an address being waited on and increments its value if equal to the value argument. | 65 | ResultCode AddressArbiter::IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, |
| 91 | ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) { | 66 | s32 num_to_wake) { |
| 92 | // Ensure that we can write to the address. | 67 | // Ensure that we can write to the address. |
| 93 | if (!Memory::IsValidVirtualAddress(address)) { | 68 | if (!Memory::IsValidVirtualAddress(address)) { |
| 94 | return ERR_INVALID_ADDRESS_STATE; | 69 | return ERR_INVALID_ADDRESS_STATE; |
| 95 | } | 70 | } |
| 96 | 71 | ||
| 97 | if (static_cast<s32>(Memory::Read32(address)) == value) { | 72 | if (static_cast<s32>(Memory::Read32(address)) != value) { |
| 98 | Memory::Write32(address, static_cast<u32>(value + 1)); | ||
| 99 | } else { | ||
| 100 | return ERR_INVALID_STATE; | 73 | return ERR_INVALID_STATE; |
| 101 | } | 74 | } |
| 102 | 75 | ||
| 103 | return SignalToAddress(address, num_to_wake); | 76 | Memory::Write32(address, static_cast<u32>(value + 1)); |
| 77 | return SignalToAddressOnly(address, num_to_wake); | ||
| 104 | } | 78 | } |
| 105 | 79 | ||
| 106 | // Signals an address being waited on and modifies its value based on waiting thread count if equal | 80 | ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, |
| 107 | // to the value argument. | 81 | s32 num_to_wake) { |
| 108 | ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, | ||
| 109 | s32 num_to_wake) { | ||
| 110 | // Ensure that we can write to the address. | 82 | // Ensure that we can write to the address. |
| 111 | if (!Memory::IsValidVirtualAddress(address)) { | 83 | if (!Memory::IsValidVirtualAddress(address)) { |
| 112 | return ERR_INVALID_ADDRESS_STATE; | 84 | return ERR_INVALID_ADDRESS_STATE; |
| 113 | } | 85 | } |
| 114 | 86 | ||
| 115 | // Get threads waiting on the address. | 87 | // Get threads waiting on the address. |
| 116 | std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); | 88 | const std::vector<SharedPtr<Thread>> waiting_threads = GetThreadsWaitingOnAddress(address); |
| 117 | 89 | ||
| 118 | // Determine the modified value depending on the waiting count. | 90 | // Determine the modified value depending on the waiting count. |
| 119 | s32 updated_value; | 91 | s32 updated_value; |
| @@ -125,41 +97,54 @@ ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 valu | |||
| 125 | updated_value = value; | 97 | updated_value = value; |
| 126 | } | 98 | } |
| 127 | 99 | ||
| 128 | if (static_cast<s32>(Memory::Read32(address)) == value) { | 100 | if (static_cast<s32>(Memory::Read32(address)) != value) { |
| 129 | Memory::Write32(address, static_cast<u32>(updated_value)); | ||
| 130 | } else { | ||
| 131 | return ERR_INVALID_STATE; | 101 | return ERR_INVALID_STATE; |
| 132 | } | 102 | } |
| 133 | 103 | ||
| 104 | Memory::Write32(address, static_cast<u32>(updated_value)); | ||
| 134 | WakeThreads(waiting_threads, num_to_wake); | 105 | WakeThreads(waiting_threads, num_to_wake); |
| 135 | return RESULT_SUCCESS; | 106 | return RESULT_SUCCESS; |
| 136 | } | 107 | } |
| 137 | 108 | ||
| 138 | // Waits on an address if the value passed is less than the argument value, optionally decrementing. | 109 | ResultCode AddressArbiter::WaitForAddress(VAddr address, ArbitrationType type, s32 value, |
| 139 | ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) { | 110 | s64 timeout_ns) { |
| 111 | switch (type) { | ||
| 112 | case ArbitrationType::WaitIfLessThan: | ||
| 113 | return WaitForAddressIfLessThan(address, value, timeout_ns, false); | ||
| 114 | case ArbitrationType::DecrementAndWaitIfLessThan: | ||
| 115 | return WaitForAddressIfLessThan(address, value, timeout_ns, true); | ||
| 116 | case ArbitrationType::WaitIfEqual: | ||
| 117 | return WaitForAddressIfEqual(address, value, timeout_ns); | ||
| 118 | default: | ||
| 119 | return ERR_INVALID_ENUM_VALUE; | ||
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | ResultCode AddressArbiter::WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, | ||
| 124 | bool should_decrement) { | ||
| 140 | // Ensure that we can read the address. | 125 | // Ensure that we can read the address. |
| 141 | if (!Memory::IsValidVirtualAddress(address)) { | 126 | if (!Memory::IsValidVirtualAddress(address)) { |
| 142 | return ERR_INVALID_ADDRESS_STATE; | 127 | return ERR_INVALID_ADDRESS_STATE; |
| 143 | } | 128 | } |
| 144 | 129 | ||
| 145 | s32 cur_value = static_cast<s32>(Memory::Read32(address)); | 130 | const s32 cur_value = static_cast<s32>(Memory::Read32(address)); |
| 146 | if (cur_value < value) { | 131 | if (cur_value >= value) { |
| 147 | if (should_decrement) { | ||
| 148 | Memory::Write32(address, static_cast<u32>(cur_value - 1)); | ||
| 149 | } | ||
| 150 | } else { | ||
| 151 | return ERR_INVALID_STATE; | 132 | return ERR_INVALID_STATE; |
| 152 | } | 133 | } |
| 134 | |||
| 135 | if (should_decrement) { | ||
| 136 | Memory::Write32(address, static_cast<u32>(cur_value - 1)); | ||
| 137 | } | ||
| 138 | |||
| 153 | // Short-circuit without rescheduling, if timeout is zero. | 139 | // Short-circuit without rescheduling, if timeout is zero. |
| 154 | if (timeout == 0) { | 140 | if (timeout == 0) { |
| 155 | return RESULT_TIMEOUT; | 141 | return RESULT_TIMEOUT; |
| 156 | } | 142 | } |
| 157 | 143 | ||
| 158 | return WaitForAddress(address, timeout); | 144 | return WaitForAddressImpl(address, timeout); |
| 159 | } | 145 | } |
| 160 | 146 | ||
| 161 | // Waits on an address if the value passed is equal to the argument value. | 147 | ResultCode AddressArbiter::WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { |
| 162 | ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { | ||
| 163 | // Ensure that we can read the address. | 148 | // Ensure that we can read the address. |
| 164 | if (!Memory::IsValidVirtualAddress(address)) { | 149 | if (!Memory::IsValidVirtualAddress(address)) { |
| 165 | return ERR_INVALID_ADDRESS_STATE; | 150 | return ERR_INVALID_ADDRESS_STATE; |
| @@ -173,6 +158,48 @@ ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) { | |||
| 173 | return RESULT_TIMEOUT; | 158 | return RESULT_TIMEOUT; |
| 174 | } | 159 | } |
| 175 | 160 | ||
| 176 | return WaitForAddress(address, timeout); | 161 | return WaitForAddressImpl(address, timeout); |
| 162 | } | ||
| 163 | |||
| 164 | ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { | ||
| 165 | SharedPtr<Thread> current_thread = system.CurrentScheduler().GetCurrentThread(); | ||
| 166 | current_thread->SetArbiterWaitAddress(address); | ||
| 167 | current_thread->SetStatus(ThreadStatus::WaitArb); | ||
| 168 | current_thread->InvalidateWakeupCallback(); | ||
| 169 | |||
| 170 | current_thread->WakeAfterDelay(timeout); | ||
| 171 | |||
| 172 | system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); | ||
| 173 | return RESULT_TIMEOUT; | ||
| 174 | } | ||
| 175 | |||
| 176 | std::vector<SharedPtr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const { | ||
| 177 | const auto RetrieveWaitingThreads = [this](std::size_t core_index, | ||
| 178 | std::vector<SharedPtr<Thread>>& waiting_threads, | ||
| 179 | VAddr arb_addr) { | ||
| 180 | const auto& scheduler = system.Scheduler(core_index); | ||
| 181 | const auto& thread_list = scheduler.GetThreadList(); | ||
| 182 | |||
| 183 | for (const auto& thread : thread_list) { | ||
| 184 | if (thread->GetArbiterWaitAddress() == arb_addr) { | ||
| 185 | waiting_threads.push_back(thread); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | }; | ||
| 189 | |||
| 190 | // Retrieve all threads that are waiting for this address. | ||
| 191 | std::vector<SharedPtr<Thread>> threads; | ||
| 192 | RetrieveWaitingThreads(0, threads, address); | ||
| 193 | RetrieveWaitingThreads(1, threads, address); | ||
| 194 | RetrieveWaitingThreads(2, threads, address); | ||
| 195 | RetrieveWaitingThreads(3, threads, address); | ||
| 196 | |||
| 197 | // Sort them by priority, such that the highest priority ones come first. | ||
| 198 | std::sort(threads.begin(), threads.end(), | ||
| 199 | [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) { | ||
| 200 | return lhs->GetPriority() < rhs->GetPriority(); | ||
| 201 | }); | ||
| 202 | |||
| 203 | return threads; | ||
| 177 | } | 204 | } |
| 178 | } // namespace Kernel::AddressArbiter | 205 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index b58f21bec..ed0d0e69f 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h | |||
| @@ -4,29 +4,77 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <vector> | ||
| 8 | |||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "core/hle/kernel/object.h" | ||
| 8 | 11 | ||
| 9 | union ResultCode; | 12 | union ResultCode; |
| 10 | 13 | ||
| 11 | namespace Kernel::AddressArbiter { | 14 | namespace Core { |
| 15 | class System; | ||
| 16 | } | ||
| 12 | 17 | ||
| 13 | enum class ArbitrationType { | 18 | namespace Kernel { |
| 14 | WaitIfLessThan = 0, | ||
| 15 | DecrementAndWaitIfLessThan = 1, | ||
| 16 | WaitIfEqual = 2, | ||
| 17 | }; | ||
| 18 | 19 | ||
| 19 | enum class SignalType { | 20 | class Thread; |
| 20 | Signal = 0, | 21 | |
| 21 | IncrementAndSignalIfEqual = 1, | 22 | class AddressArbiter { |
| 22 | ModifyByWaitingCountAndSignalIfEqual = 2, | 23 | public: |
| 23 | }; | 24 | enum class ArbitrationType { |
| 25 | WaitIfLessThan = 0, | ||
| 26 | DecrementAndWaitIfLessThan = 1, | ||
| 27 | WaitIfEqual = 2, | ||
| 28 | }; | ||
| 29 | |||
| 30 | enum class SignalType { | ||
| 31 | Signal = 0, | ||
| 32 | IncrementAndSignalIfEqual = 1, | ||
| 33 | ModifyByWaitingCountAndSignalIfEqual = 2, | ||
| 34 | }; | ||
| 35 | |||
| 36 | explicit AddressArbiter(Core::System& system); | ||
| 37 | ~AddressArbiter(); | ||
| 38 | |||
| 39 | AddressArbiter(const AddressArbiter&) = delete; | ||
| 40 | AddressArbiter& operator=(const AddressArbiter&) = delete; | ||
| 41 | |||
| 42 | AddressArbiter(AddressArbiter&&) = default; | ||
| 43 | AddressArbiter& operator=(AddressArbiter&&) = delete; | ||
| 44 | |||
| 45 | /// Signals an address being waited on with a particular signaling type. | ||
| 46 | ResultCode SignalToAddress(VAddr address, SignalType type, s32 value, s32 num_to_wake); | ||
| 24 | 47 | ||
| 25 | ResultCode SignalToAddress(VAddr address, s32 num_to_wake); | 48 | /// Waits on an address with a particular arbitration type. |
| 26 | ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); | 49 | ResultCode WaitForAddress(VAddr address, ArbitrationType type, s32 value, s64 timeout_ns); |
| 27 | ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); | ||
| 28 | 50 | ||
| 29 | ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement); | 51 | private: |
| 30 | ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); | 52 | /// Signals an address being waited on. |
| 53 | ResultCode SignalToAddressOnly(VAddr address, s32 num_to_wake); | ||
| 54 | |||
| 55 | /// Signals an address being waited on and increments its value if equal to the value argument. | ||
| 56 | ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake); | ||
| 57 | |||
| 58 | /// Signals an address being waited on and modifies its value based on waiting thread count if | ||
| 59 | /// equal to the value argument. | ||
| 60 | ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, | ||
| 61 | s32 num_to_wake); | ||
| 62 | |||
| 63 | /// Waits on an address if the value passed is less than the argument value, | ||
| 64 | /// optionally decrementing. | ||
| 65 | ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, | ||
| 66 | bool should_decrement); | ||
| 67 | |||
| 68 | /// Waits on an address if the value passed is equal to the argument value. | ||
| 69 | ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout); | ||
| 70 | |||
| 71 | // Waits on the given address with a timeout in nanoseconds | ||
| 72 | ResultCode WaitForAddressImpl(VAddr address, s64 timeout); | ||
| 73 | |||
| 74 | // Gets the threads waiting on an address. | ||
| 75 | std::vector<SharedPtr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; | ||
| 76 | |||
| 77 | Core::System& system; | ||
| 78 | }; | ||
| 31 | 79 | ||
| 32 | } // namespace Kernel::AddressArbiter | 80 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index 704e82824..c17baa50a 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp | |||
| @@ -17,21 +17,11 @@ ClientSession::~ClientSession() { | |||
| 17 | // This destructor will be called automatically when the last ClientSession handle is closed by | 17 | // This destructor will be called automatically when the last ClientSession handle is closed by |
| 18 | // the emulated application. | 18 | // the emulated application. |
| 19 | 19 | ||
| 20 | // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they | 20 | // A local reference to the ServerSession is necessary to guarantee it |
| 21 | // will be kept alive until after ClientDisconnected() returns. | 21 | // will be kept alive until after ClientDisconnected() returns. |
| 22 | SharedPtr<ServerSession> server = parent->server; | 22 | SharedPtr<ServerSession> server = parent->server; |
| 23 | if (server) { | 23 | if (server) { |
| 24 | std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; | 24 | server->ClientDisconnected(); |
| 25 | if (hle_handler) | ||
| 26 | hle_handler->ClientDisconnected(server); | ||
| 27 | |||
| 28 | // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set | ||
| 29 | // their WaitSynchronization result to 0xC920181A. | ||
| 30 | |||
| 31 | // Clean up the list of client threads with pending requests, they are unneeded now that the | ||
| 32 | // client endpoint is closed. | ||
| 33 | server->pending_requesting_threads.clear(); | ||
| 34 | server->currently_handling = nullptr; | ||
| 35 | } | 25 | } |
| 36 | 26 | ||
| 37 | parent->client = nullptr; | 27 | parent->client = nullptr; |
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h index 4c18de69c..b1f39aad7 100644 --- a/src/core/hle/kernel/client_session.h +++ b/src/core/hle/kernel/client_session.h | |||
| @@ -36,14 +36,15 @@ public: | |||
| 36 | 36 | ||
| 37 | ResultCode SendSyncRequest(SharedPtr<Thread> thread); | 37 | ResultCode SendSyncRequest(SharedPtr<Thread> thread); |
| 38 | 38 | ||
| 39 | std::string name; ///< Name of client port (optional) | 39 | private: |
| 40 | explicit ClientSession(KernelCore& kernel); | ||
| 41 | ~ClientSession() override; | ||
| 40 | 42 | ||
| 41 | /// The parent session, which links to the server endpoint. | 43 | /// The parent session, which links to the server endpoint. |
| 42 | std::shared_ptr<Session> parent; | 44 | std::shared_ptr<Session> parent; |
| 43 | 45 | ||
| 44 | private: | 46 | /// Name of the client session (optional) |
| 45 | explicit ClientSession(KernelCore& kernel); | 47 | std::string name; |
| 46 | ~ClientSession() override; | ||
| 47 | }; | 48 | }; |
| 48 | 49 | ||
| 49 | } // namespace Kernel | 50 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 5dd855db8..fe710eb6e 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp | |||
| @@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default; | |||
| 86 | void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, | 86 | void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, |
| 87 | bool incoming) { | 87 | bool incoming) { |
| 88 | IPC::RequestParser rp(src_cmdbuf); | 88 | IPC::RequestParser rp(src_cmdbuf); |
| 89 | command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); | 89 | command_header = rp.PopRaw<IPC::CommandHeader>(); |
| 90 | 90 | ||
| 91 | if (command_header->type == IPC::CommandType::Close) { | 91 | if (command_header->type == IPC::CommandType::Close) { |
| 92 | // Close does not populate the rest of the IPC header | 92 | // Close does not populate the rest of the IPC header |
| @@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_ | |||
| 95 | 95 | ||
| 96 | // If handle descriptor is present, add size of it | 96 | // If handle descriptor is present, add size of it |
| 97 | if (command_header->enable_handle_descriptor) { | 97 | if (command_header->enable_handle_descriptor) { |
| 98 | handle_descriptor_header = | 98 | handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>(); |
| 99 | std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>()); | ||
| 100 | if (handle_descriptor_header->send_current_pid) { | 99 | if (handle_descriptor_header->send_current_pid) { |
| 101 | rp.Skip(2, false); | 100 | rp.Skip(2, false); |
| 102 | } | 101 | } |
| @@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_ | |||
| 140 | // If this is an incoming message, only CommandType "Request" has a domain header | 139 | // If this is an incoming message, only CommandType "Request" has a domain header |
| 141 | // All outgoing domain messages have the domain header, if only incoming has it | 140 | // All outgoing domain messages have the domain header, if only incoming has it |
| 142 | if (incoming || domain_message_header) { | 141 | if (incoming || domain_message_header) { |
| 143 | domain_message_header = | 142 | domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>(); |
| 144 | std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>()); | ||
| 145 | } else { | 143 | } else { |
| 146 | if (Session()->IsDomain()) | 144 | if (Session()->IsDomain()) { |
| 147 | LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); | 145 | LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); |
| 146 | } | ||
| 148 | } | 147 | } |
| 149 | } | 148 | } |
| 150 | 149 | ||
| 151 | data_payload_header = | 150 | data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>(); |
| 152 | std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>()); | ||
| 153 | 151 | ||
| 154 | data_payload_offset = rp.GetCurrentOffset(); | 152 | data_payload_offset = rp.GetCurrentOffset(); |
| 155 | 153 | ||
| @@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) { | |||
| 264 | // Write the domain objects to the command buffer, these go after the raw untranslated data. | 262 | // Write the domain objects to the command buffer, these go after the raw untranslated data. |
| 265 | // TODO(Subv): This completely ignores C buffers. | 263 | // TODO(Subv): This completely ignores C buffers. |
| 266 | std::size_t domain_offset = size - domain_message_header->num_objects; | 264 | std::size_t domain_offset = size - domain_message_header->num_objects; |
| 267 | auto& request_handlers = server_session->domain_request_handlers; | ||
| 268 | 265 | ||
| 269 | for (auto& object : domain_objects) { | 266 | for (const auto& object : domain_objects) { |
| 270 | request_handlers.emplace_back(object); | 267 | server_session->AppendDomainRequestHandler(object); |
| 271 | dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); | 268 | dst_cmdbuf[domain_offset++] = |
| 269 | static_cast<u32_le>(server_session->NumDomainRequestHandlers()); | ||
| 272 | } | 270 | } |
| 273 | } | 271 | } |
| 274 | 272 | ||
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index cb1c5aff3..2bdd9f02c 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <optional> | ||
| 9 | #include <string> | 10 | #include <string> |
| 10 | #include <type_traits> | 11 | #include <type_traits> |
| 11 | #include <vector> | 12 | #include <vector> |
| @@ -15,6 +16,8 @@ | |||
| 15 | #include "core/hle/ipc.h" | 16 | #include "core/hle/ipc.h" |
| 16 | #include "core/hle/kernel/object.h" | 17 | #include "core/hle/kernel/object.h" |
| 17 | 18 | ||
| 19 | union ResultCode; | ||
| 20 | |||
| 18 | namespace Service { | 21 | namespace Service { |
| 19 | class ServiceFrameworkBase; | 22 | class ServiceFrameworkBase; |
| 20 | } | 23 | } |
| @@ -166,12 +169,12 @@ public: | |||
| 166 | return buffer_c_desciptors; | 169 | return buffer_c_desciptors; |
| 167 | } | 170 | } |
| 168 | 171 | ||
| 169 | const IPC::DomainMessageHeader* GetDomainMessageHeader() const { | 172 | const IPC::DomainMessageHeader& GetDomainMessageHeader() const { |
| 170 | return domain_message_header.get(); | 173 | return domain_message_header.value(); |
| 171 | } | 174 | } |
| 172 | 175 | ||
| 173 | bool HasDomainMessageHeader() const { | 176 | bool HasDomainMessageHeader() const { |
| 174 | return domain_message_header != nullptr; | 177 | return domain_message_header.has_value(); |
| 175 | } | 178 | } |
| 176 | 179 | ||
| 177 | /// Helper function to read a buffer using the appropriate buffer descriptor | 180 | /// Helper function to read a buffer using the appropriate buffer descriptor |
| @@ -208,14 +211,12 @@ public: | |||
| 208 | 211 | ||
| 209 | template <typename T> | 212 | template <typename T> |
| 210 | SharedPtr<T> GetCopyObject(std::size_t index) { | 213 | SharedPtr<T> GetCopyObject(std::size_t index) { |
| 211 | ASSERT(index < copy_objects.size()); | 214 | return DynamicObjectCast<T>(copy_objects.at(index)); |
| 212 | return DynamicObjectCast<T>(copy_objects[index]); | ||
| 213 | } | 215 | } |
| 214 | 216 | ||
| 215 | template <typename T> | 217 | template <typename T> |
| 216 | SharedPtr<T> GetMoveObject(std::size_t index) { | 218 | SharedPtr<T> GetMoveObject(std::size_t index) { |
| 217 | ASSERT(index < move_objects.size()); | 219 | return DynamicObjectCast<T>(move_objects.at(index)); |
| 218 | return DynamicObjectCast<T>(move_objects[index]); | ||
| 219 | } | 220 | } |
| 220 | 221 | ||
| 221 | void AddMoveObject(SharedPtr<Object> object) { | 222 | void AddMoveObject(SharedPtr<Object> object) { |
| @@ -232,7 +233,7 @@ public: | |||
| 232 | 233 | ||
| 233 | template <typename T> | 234 | template <typename T> |
| 234 | std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { | 235 | std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { |
| 235 | return std::static_pointer_cast<T>(domain_request_handlers[index]); | 236 | return std::static_pointer_cast<T>(domain_request_handlers.at(index)); |
| 236 | } | 237 | } |
| 237 | 238 | ||
| 238 | void SetDomainRequestHandlers( | 239 | void SetDomainRequestHandlers( |
| @@ -272,10 +273,10 @@ private: | |||
| 272 | boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; | 273 | boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; |
| 273 | boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; | 274 | boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; |
| 274 | 275 | ||
| 275 | std::shared_ptr<IPC::CommandHeader> command_header; | 276 | std::optional<IPC::CommandHeader> command_header; |
| 276 | std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; | 277 | std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header; |
| 277 | std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; | 278 | std::optional<IPC::DataPayloadHeader> data_payload_header; |
| 278 | std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; | 279 | std::optional<IPC::DomainMessageHeader> domain_message_header; |
| 279 | std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; | 280 | std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; |
| 280 | std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; | 281 | std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; |
| 281 | std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; | 282 | std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index dd749eed4..4d224d01d 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #include "core/core.h" | 13 | #include "core/core.h" |
| 14 | #include "core/core_timing.h" | 14 | #include "core/core_timing.h" |
| 15 | #include "core/hle/kernel/address_arbiter.h" | ||
| 15 | #include "core/hle/kernel/client_port.h" | 16 | #include "core/hle/kernel/client_port.h" |
| 16 | #include "core/hle/kernel/handle_table.h" | 17 | #include "core/hle/kernel/handle_table.h" |
| 17 | #include "core/hle/kernel/kernel.h" | 18 | #include "core/hle/kernel/kernel.h" |
| @@ -86,11 +87,13 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] int cycles_ | |||
| 86 | } | 87 | } |
| 87 | 88 | ||
| 88 | struct KernelCore::Impl { | 89 | struct KernelCore::Impl { |
| 89 | void Initialize(KernelCore& kernel, Core::Timing::CoreTiming& core_timing) { | 90 | explicit Impl(Core::System& system) : system{system} {} |
| 91 | |||
| 92 | void Initialize(KernelCore& kernel) { | ||
| 90 | Shutdown(); | 93 | Shutdown(); |
| 91 | 94 | ||
| 92 | InitializeSystemResourceLimit(kernel); | 95 | InitializeSystemResourceLimit(kernel); |
| 93 | InitializeThreads(core_timing); | 96 | InitializeThreads(); |
| 94 | } | 97 | } |
| 95 | 98 | ||
| 96 | void Shutdown() { | 99 | void Shutdown() { |
| @@ -122,9 +125,9 @@ struct KernelCore::Impl { | |||
| 122 | ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); | 125 | ASSERT(system_resource_limit->SetLimitValue(ResourceType::Sessions, 900).IsSuccess()); |
| 123 | } | 126 | } |
| 124 | 127 | ||
| 125 | void InitializeThreads(Core::Timing::CoreTiming& core_timing) { | 128 | void InitializeThreads() { |
| 126 | thread_wakeup_event_type = | 129 | thread_wakeup_event_type = |
| 127 | core_timing.RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); | 130 | system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); |
| 128 | } | 131 | } |
| 129 | 132 | ||
| 130 | std::atomic<u32> next_object_id{0}; | 133 | std::atomic<u32> next_object_id{0}; |
| @@ -145,15 +148,18 @@ struct KernelCore::Impl { | |||
| 145 | /// Map of named ports managed by the kernel, which can be retrieved using | 148 | /// Map of named ports managed by the kernel, which can be retrieved using |
| 146 | /// the ConnectToPort SVC. | 149 | /// the ConnectToPort SVC. |
| 147 | NamedPortTable named_ports; | 150 | NamedPortTable named_ports; |
| 151 | |||
| 152 | // System context | ||
| 153 | Core::System& system; | ||
| 148 | }; | 154 | }; |
| 149 | 155 | ||
| 150 | KernelCore::KernelCore() : impl{std::make_unique<Impl>()} {} | 156 | KernelCore::KernelCore(Core::System& system) : impl{std::make_unique<Impl>(system)} {} |
| 151 | KernelCore::~KernelCore() { | 157 | KernelCore::~KernelCore() { |
| 152 | Shutdown(); | 158 | Shutdown(); |
| 153 | } | 159 | } |
| 154 | 160 | ||
| 155 | void KernelCore::Initialize(Core::Timing::CoreTiming& core_timing) { | 161 | void KernelCore::Initialize() { |
| 156 | impl->Initialize(*this, core_timing); | 162 | impl->Initialize(*this); |
| 157 | } | 163 | } |
| 158 | 164 | ||
| 159 | void KernelCore::Shutdown() { | 165 | void KernelCore::Shutdown() { |
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 154bced42..ff17ff865 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h | |||
| @@ -11,6 +11,10 @@ | |||
| 11 | template <typename T> | 11 | template <typename T> |
| 12 | class ResultVal; | 12 | class ResultVal; |
| 13 | 13 | ||
| 14 | namespace Core { | ||
| 15 | class System; | ||
| 16 | } | ||
| 17 | |||
| 14 | namespace Core::Timing { | 18 | namespace Core::Timing { |
| 15 | class CoreTiming; | 19 | class CoreTiming; |
| 16 | struct EventType; | 20 | struct EventType; |
| @@ -18,6 +22,7 @@ struct EventType; | |||
| 18 | 22 | ||
| 19 | namespace Kernel { | 23 | namespace Kernel { |
| 20 | 24 | ||
| 25 | class AddressArbiter; | ||
| 21 | class ClientPort; | 26 | class ClientPort; |
| 22 | class HandleTable; | 27 | class HandleTable; |
| 23 | class Process; | 28 | class Process; |
| @@ -30,7 +35,14 @@ private: | |||
| 30 | using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; | 35 | using NamedPortTable = std::unordered_map<std::string, SharedPtr<ClientPort>>; |
| 31 | 36 | ||
| 32 | public: | 37 | public: |
| 33 | KernelCore(); | 38 | /// Constructs an instance of the kernel using the given System |
| 39 | /// instance as a context for any necessary system-related state, | ||
| 40 | /// such as threads, CPU core state, etc. | ||
| 41 | /// | ||
| 42 | /// @post After execution of the constructor, the provided System | ||
| 43 | /// object *must* outlive the kernel instance itself. | ||
| 44 | /// | ||
| 45 | explicit KernelCore(Core::System& system); | ||
| 34 | ~KernelCore(); | 46 | ~KernelCore(); |
| 35 | 47 | ||
| 36 | KernelCore(const KernelCore&) = delete; | 48 | KernelCore(const KernelCore&) = delete; |
| @@ -40,11 +52,7 @@ public: | |||
| 40 | KernelCore& operator=(KernelCore&&) = delete; | 52 | KernelCore& operator=(KernelCore&&) = delete; |
| 41 | 53 | ||
| 42 | /// Resets the kernel to a clean slate for use. | 54 | /// Resets the kernel to a clean slate for use. |
| 43 | /// | 55 | void Initialize(); |
| 44 | /// @param core_timing CoreTiming instance used to create any necessary | ||
| 45 | /// kernel-specific callback events. | ||
| 46 | /// | ||
| 47 | void Initialize(Core::Timing::CoreTiming& core_timing); | ||
| 48 | 56 | ||
| 49 | /// Clears all resources in use by the kernel instance. | 57 | /// Clears all resources in use by the kernel instance. |
| 50 | void Shutdown(); | 58 | void Shutdown(); |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 8009150e0..7e8ba978c 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -53,9 +53,10 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_poi | |||
| 53 | CodeSet::CodeSet() = default; | 53 | CodeSet::CodeSet() = default; |
| 54 | CodeSet::~CodeSet() = default; | 54 | CodeSet::~CodeSet() = default; |
| 55 | 55 | ||
| 56 | SharedPtr<Process> Process::Create(KernelCore& kernel, std::string&& name) { | 56 | SharedPtr<Process> Process::Create(Core::System& system, std::string&& name) { |
| 57 | SharedPtr<Process> process(new Process(kernel)); | 57 | auto& kernel = system.Kernel(); |
| 58 | 58 | ||
| 59 | SharedPtr<Process> process(new Process(system)); | ||
| 59 | process->name = std::move(name); | 60 | process->name = std::move(name); |
| 60 | process->resource_limit = kernel.GetSystemResourceLimit(); | 61 | process->resource_limit = kernel.GetSystemResourceLimit(); |
| 61 | process->status = ProcessStatus::Created; | 62 | process->status = ProcessStatus::Created; |
| @@ -233,8 +234,8 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) { | |||
| 233 | Core::System::GetInstance().ArmInterface(3).ClearInstructionCache(); | 234 | Core::System::GetInstance().ArmInterface(3).ClearInstructionCache(); |
| 234 | } | 235 | } |
| 235 | 236 | ||
| 236 | Kernel::Process::Process(KernelCore& kernel) : WaitObject{kernel} {} | 237 | Process::Process(Core::System& system) : WaitObject{system.Kernel()}, address_arbiter{system} {} |
| 237 | Kernel::Process::~Process() {} | 238 | Process::~Process() = default; |
| 238 | 239 | ||
| 239 | void Process::Acquire(Thread* thread) { | 240 | void Process::Acquire(Thread* thread) { |
| 240 | ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); | 241 | ASSERT_MSG(!ShouldWait(thread), "Object unavailable!"); |
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index dcc57ae9f..2a132c894 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h | |||
| @@ -12,12 +12,17 @@ | |||
| 12 | #include <vector> | 12 | #include <vector> |
| 13 | #include <boost/container/static_vector.hpp> | 13 | #include <boost/container/static_vector.hpp> |
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "core/hle/kernel/address_arbiter.h" | ||
| 15 | #include "core/hle/kernel/handle_table.h" | 16 | #include "core/hle/kernel/handle_table.h" |
| 16 | #include "core/hle/kernel/process_capability.h" | 17 | #include "core/hle/kernel/process_capability.h" |
| 17 | #include "core/hle/kernel/vm_manager.h" | 18 | #include "core/hle/kernel/vm_manager.h" |
| 18 | #include "core/hle/kernel/wait_object.h" | 19 | #include "core/hle/kernel/wait_object.h" |
| 19 | #include "core/hle/result.h" | 20 | #include "core/hle/result.h" |
| 20 | 21 | ||
| 22 | namespace Core { | ||
| 23 | class System; | ||
| 24 | } | ||
| 25 | |||
| 21 | namespace FileSys { | 26 | namespace FileSys { |
| 22 | class ProgramMetadata; | 27 | class ProgramMetadata; |
| 23 | } | 28 | } |
| @@ -116,7 +121,7 @@ public: | |||
| 116 | 121 | ||
| 117 | static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; | 122 | static constexpr std::size_t RANDOM_ENTROPY_SIZE = 4; |
| 118 | 123 | ||
| 119 | static SharedPtr<Process> Create(KernelCore& kernel, std::string&& name); | 124 | static SharedPtr<Process> Create(Core::System& system, std::string&& name); |
| 120 | 125 | ||
| 121 | std::string GetTypeName() const override { | 126 | std::string GetTypeName() const override { |
| 122 | return "Process"; | 127 | return "Process"; |
| @@ -150,6 +155,16 @@ public: | |||
| 150 | return handle_table; | 155 | return handle_table; |
| 151 | } | 156 | } |
| 152 | 157 | ||
| 158 | /// Gets a reference to the process' address arbiter. | ||
| 159 | AddressArbiter& GetAddressArbiter() { | ||
| 160 | return address_arbiter; | ||
| 161 | } | ||
| 162 | |||
| 163 | /// Gets a const reference to the process' address arbiter. | ||
| 164 | const AddressArbiter& GetAddressArbiter() const { | ||
| 165 | return address_arbiter; | ||
| 166 | } | ||
| 167 | |||
| 153 | /// Gets the current status of the process | 168 | /// Gets the current status of the process |
| 154 | ProcessStatus GetStatus() const { | 169 | ProcessStatus GetStatus() const { |
| 155 | return status; | 170 | return status; |
| @@ -251,7 +266,7 @@ public: | |||
| 251 | void FreeTLSSlot(VAddr tls_address); | 266 | void FreeTLSSlot(VAddr tls_address); |
| 252 | 267 | ||
| 253 | private: | 268 | private: |
| 254 | explicit Process(KernelCore& kernel); | 269 | explicit Process(Core::System& kernel); |
| 255 | ~Process() override; | 270 | ~Process() override; |
| 256 | 271 | ||
| 257 | /// Checks if the specified thread should wait until this process is available. | 272 | /// Checks if the specified thread should wait until this process is available. |
| @@ -309,6 +324,9 @@ private: | |||
| 309 | /// Per-process handle table for storing created object handles in. | 324 | /// Per-process handle table for storing created object handles in. |
| 310 | HandleTable handle_table; | 325 | HandleTable handle_table; |
| 311 | 326 | ||
| 327 | /// Per-process address arbiter. | ||
| 328 | AddressArbiter address_arbiter; | ||
| 329 | |||
| 312 | /// Random values for svcGetInfo RandomEntropy | 330 | /// Random values for svcGetInfo RandomEntropy |
| 313 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; | 331 | std::array<u64, RANDOM_ENTROPY_SIZE> random_entropy; |
| 314 | 332 | ||
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 44f30d070..5fccfd9f4 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp | |||
| @@ -19,7 +19,8 @@ namespace Kernel { | |||
| 19 | 19 | ||
| 20 | std::mutex Scheduler::scheduler_mutex; | 20 | std::mutex Scheduler::scheduler_mutex; |
| 21 | 21 | ||
| 22 | Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} | 22 | Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core) |
| 23 | : cpu_core{cpu_core}, system{system} {} | ||
| 23 | 24 | ||
| 24 | Scheduler::~Scheduler() { | 25 | Scheduler::~Scheduler() { |
| 25 | for (auto& thread : thread_list) { | 26 | for (auto& thread : thread_list) { |
| @@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() { | |||
| 61 | 62 | ||
| 62 | void Scheduler::SwitchContext(Thread* new_thread) { | 63 | void Scheduler::SwitchContext(Thread* new_thread) { |
| 63 | Thread* const previous_thread = GetCurrentThread(); | 64 | Thread* const previous_thread = GetCurrentThread(); |
| 64 | Process* const previous_process = Core::CurrentProcess(); | 65 | Process* const previous_process = system.Kernel().CurrentProcess(); |
| 65 | 66 | ||
| 66 | UpdateLastContextSwitchTime(previous_thread, previous_process); | 67 | UpdateLastContextSwitchTime(previous_thread, previous_process); |
| 67 | 68 | ||
| @@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) { | |||
| 94 | 95 | ||
| 95 | auto* const thread_owner_process = current_thread->GetOwnerProcess(); | 96 | auto* const thread_owner_process = current_thread->GetOwnerProcess(); |
| 96 | if (previous_process != thread_owner_process) { | 97 | if (previous_process != thread_owner_process) { |
| 97 | Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); | 98 | system.Kernel().MakeCurrentProcess(thread_owner_process); |
| 98 | SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); | 99 | SetCurrentPageTable(&thread_owner_process->VMManager().page_table); |
| 99 | } | 100 | } |
| 100 | 101 | ||
| 101 | cpu_core.LoadContext(new_thread->GetContext()); | 102 | cpu_core.LoadContext(new_thread->GetContext()); |
| @@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) { | |||
| 111 | 112 | ||
| 112 | void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { | 113 | void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { |
| 113 | const u64 prev_switch_ticks = last_context_switch_time; | 114 | const u64 prev_switch_ticks = last_context_switch_time; |
| 114 | const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); | 115 | const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); |
| 115 | const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; | 116 | const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; |
| 116 | 117 | ||
| 117 | if (thread != nullptr) { | 118 | if (thread != nullptr) { |
| @@ -223,8 +224,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) { | |||
| 223 | // Take the first non-nullptr one | 224 | // Take the first non-nullptr one |
| 224 | for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { | 225 | for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { |
| 225 | const auto res = | 226 | const auto res = |
| 226 | Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( | 227 | system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority); |
| 227 | core, priority); | ||
| 228 | 228 | ||
| 229 | // If scheduler provides a suggested thread | 229 | // If scheduler provides a suggested thread |
| 230 | if (res != nullptr) { | 230 | if (res != nullptr) { |
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 97ced4dfc..1c5bf57d9 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h | |||
| @@ -13,7 +13,8 @@ | |||
| 13 | 13 | ||
| 14 | namespace Core { | 14 | namespace Core { |
| 15 | class ARM_Interface; | 15 | class ARM_Interface; |
| 16 | } | 16 | class System; |
| 17 | } // namespace Core | ||
| 17 | 18 | ||
| 18 | namespace Kernel { | 19 | namespace Kernel { |
| 19 | 20 | ||
| @@ -21,7 +22,7 @@ class Process; | |||
| 21 | 22 | ||
| 22 | class Scheduler final { | 23 | class Scheduler final { |
| 23 | public: | 24 | public: |
| 24 | explicit Scheduler(Core::ARM_Interface& cpu_core); | 25 | explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core); |
| 25 | ~Scheduler(); | 26 | ~Scheduler(); |
| 26 | 27 | ||
| 27 | /// Returns whether there are any threads that are ready to run. | 28 | /// Returns whether there are any threads that are ready to run. |
| @@ -162,6 +163,7 @@ private: | |||
| 162 | Core::ARM_Interface& cpu_core; | 163 | Core::ARM_Interface& cpu_core; |
| 163 | u64 last_context_switch_time = 0; | 164 | u64 last_context_switch_time = 0; |
| 164 | 165 | ||
| 166 | Core::System& system; | ||
| 165 | static std::mutex scheduler_mutex; | 167 | static std::mutex scheduler_mutex; |
| 166 | }; | 168 | }; |
| 167 | 169 | ||
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 027434f92..4d8a337a7 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp | |||
| @@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) { | |||
| 63 | pending_requesting_threads.pop_back(); | 63 | pending_requesting_threads.pop_back(); |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | void ServerSession::ClientDisconnected() { | ||
| 67 | // We keep a shared pointer to the hle handler to keep it alive throughout | ||
| 68 | // the call to ClientDisconnected, as ClientDisconnected invalidates the | ||
| 69 | // hle_handler member itself during the course of the function executing. | ||
| 70 | std::shared_ptr<SessionRequestHandler> handler = hle_handler; | ||
| 71 | if (handler) { | ||
| 72 | // Note that after this returns, this server session's hle_handler is | ||
| 73 | // invalidated (set to null). | ||
| 74 | handler->ClientDisconnected(this); | ||
| 75 | } | ||
| 76 | |||
| 77 | // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set | ||
| 78 | // their WaitSynchronization result to 0xC920181A. | ||
| 79 | |||
| 80 | // Clean up the list of client threads with pending requests, they are unneeded now that the | ||
| 81 | // client endpoint is closed. | ||
| 82 | pending_requesting_threads.clear(); | ||
| 83 | currently_handling = nullptr; | ||
| 84 | } | ||
| 85 | |||
| 86 | void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) { | ||
| 87 | domain_request_handlers.push_back(std::move(handler)); | ||
| 88 | } | ||
| 89 | |||
| 90 | std::size_t ServerSession::NumDomainRequestHandlers() const { | ||
| 91 | return domain_request_handlers.size(); | ||
| 92 | } | ||
| 93 | |||
| 66 | ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { | 94 | ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { |
| 67 | auto* const domain_message_header = context.GetDomainMessageHeader(); | 95 | if (!context.HasDomainMessageHeader()) { |
| 68 | if (domain_message_header) { | 96 | return RESULT_SUCCESS; |
| 69 | // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs | 97 | } |
| 70 | context.SetDomainRequestHandlers(domain_request_handlers); | 98 | |
| 71 | 99 | // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs | |
| 72 | // If there is a DomainMessageHeader, then this is CommandType "Request" | 100 | context.SetDomainRequestHandlers(domain_request_handlers); |
| 73 | const u32 object_id{context.GetDomainMessageHeader()->object_id}; | 101 | |
| 74 | switch (domain_message_header->command) { | 102 | // If there is a DomainMessageHeader, then this is CommandType "Request" |
| 75 | case IPC::DomainMessageHeader::CommandType::SendMessage: | 103 | const auto& domain_message_header = context.GetDomainMessageHeader(); |
| 76 | if (object_id > domain_request_handlers.size()) { | 104 | const u32 object_id{domain_message_header.object_id}; |
| 77 | LOG_CRITICAL(IPC, | 105 | switch (domain_message_header.command) { |
| 78 | "object_id {} is too big! This probably means a recent service call " | 106 | case IPC::DomainMessageHeader::CommandType::SendMessage: |
| 79 | "to {} needed to return a new interface!", | 107 | if (object_id > domain_request_handlers.size()) { |
| 80 | object_id, name); | 108 | LOG_CRITICAL(IPC, |
| 81 | UNREACHABLE(); | 109 | "object_id {} is too big! This probably means a recent service call " |
| 82 | return RESULT_SUCCESS; // Ignore error if asserts are off | 110 | "to {} needed to return a new interface!", |
| 83 | } | 111 | object_id, name); |
| 84 | return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); | 112 | UNREACHABLE(); |
| 85 | 113 | return RESULT_SUCCESS; // Ignore error if asserts are off | |
| 86 | case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: { | ||
| 87 | LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id); | ||
| 88 | |||
| 89 | domain_request_handlers[object_id - 1] = nullptr; | ||
| 90 | |||
| 91 | IPC::ResponseBuilder rb{context, 2}; | ||
| 92 | rb.Push(RESULT_SUCCESS); | ||
| 93 | return RESULT_SUCCESS; | ||
| 94 | } | ||
| 95 | } | 114 | } |
| 115 | return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); | ||
| 96 | 116 | ||
| 97 | LOG_CRITICAL(IPC, "Unknown domain command={}", | 117 | case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: { |
| 98 | static_cast<int>(domain_message_header->command.Value())); | 118 | LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id); |
| 99 | ASSERT(false); | 119 | |
| 120 | domain_request_handlers[object_id - 1] = nullptr; | ||
| 121 | |||
| 122 | IPC::ResponseBuilder rb{context, 2}; | ||
| 123 | rb.Push(RESULT_SUCCESS); | ||
| 124 | return RESULT_SUCCESS; | ||
| 125 | } | ||
| 100 | } | 126 | } |
| 101 | 127 | ||
| 128 | LOG_CRITICAL(IPC, "Unknown domain command={}", | ||
| 129 | static_cast<int>(domain_message_header.command.Value())); | ||
| 130 | ASSERT(false); | ||
| 102 | return RESULT_SUCCESS; | 131 | return RESULT_SUCCESS; |
| 103 | } | 132 | } |
| 104 | 133 | ||
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index e0e9d64c8..aea4ccfeb 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h | |||
| @@ -46,6 +46,14 @@ public: | |||
| 46 | return HANDLE_TYPE; | 46 | return HANDLE_TYPE; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | Session* GetParent() { | ||
| 50 | return parent.get(); | ||
| 51 | } | ||
| 52 | |||
| 53 | const Session* GetParent() const { | ||
| 54 | return parent.get(); | ||
| 55 | } | ||
| 56 | |||
| 49 | using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; | 57 | using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; |
| 50 | 58 | ||
| 51 | /** | 59 | /** |
| @@ -78,23 +86,16 @@ public: | |||
| 78 | 86 | ||
| 79 | void Acquire(Thread* thread) override; | 87 | void Acquire(Thread* thread) override; |
| 80 | 88 | ||
| 81 | std::string name; ///< The name of this session (optional) | 89 | /// Called when a client disconnection occurs. |
| 82 | std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. | 90 | void ClientDisconnected(); |
| 83 | std::shared_ptr<SessionRequestHandler> | ||
| 84 | hle_handler; ///< This session's HLE request handler (applicable when not a domain) | ||
| 85 | 91 | ||
| 86 | /// This is the list of domain request handlers (after conversion to a domain) | 92 | /// Adds a new domain request handler to the collection of request handlers within |
| 87 | std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; | 93 | /// this ServerSession instance. |
| 88 | 94 | void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler); | |
| 89 | /// List of threads that are pending a response after a sync request. This list is processed in | ||
| 90 | /// a LIFO manner, thus, the last request will be dispatched first. | ||
| 91 | /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test. | ||
| 92 | std::vector<SharedPtr<Thread>> pending_requesting_threads; | ||
| 93 | 95 | ||
| 94 | /// Thread whose request is currently being handled. A request is considered "handled" when a | 96 | /// Retrieves the total number of domain request handlers that have been |
| 95 | /// response is sent via svcReplyAndReceive. | 97 | /// appended to this ServerSession instance. |
| 96 | /// TODO(Subv): Find a better name for this. | 98 | std::size_t NumDomainRequestHandlers() const; |
| 97 | SharedPtr<Thread> currently_handling; | ||
| 98 | 99 | ||
| 99 | /// Returns true if the session has been converted to a domain, otherwise False | 100 | /// Returns true if the session has been converted to a domain, otherwise False |
| 100 | bool IsDomain() const { | 101 | bool IsDomain() const { |
| @@ -129,8 +130,30 @@ private: | |||
| 129 | /// object handle. | 130 | /// object handle. |
| 130 | ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); | 131 | ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); |
| 131 | 132 | ||
| 133 | /// The parent session, which links to the client endpoint. | ||
| 134 | std::shared_ptr<Session> parent; | ||
| 135 | |||
| 136 | /// This session's HLE request handler (applicable when not a domain) | ||
| 137 | std::shared_ptr<SessionRequestHandler> hle_handler; | ||
| 138 | |||
| 139 | /// This is the list of domain request handlers (after conversion to a domain) | ||
| 140 | std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; | ||
| 141 | |||
| 142 | /// List of threads that are pending a response after a sync request. This list is processed in | ||
| 143 | /// a LIFO manner, thus, the last request will be dispatched first. | ||
| 144 | /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test. | ||
| 145 | std::vector<SharedPtr<Thread>> pending_requesting_threads; | ||
| 146 | |||
| 147 | /// Thread whose request is currently being handled. A request is considered "handled" when a | ||
| 148 | /// response is sent via svcReplyAndReceive. | ||
| 149 | /// TODO(Subv): Find a better name for this. | ||
| 150 | SharedPtr<Thread> currently_handling; | ||
| 151 | |||
| 132 | /// When set to True, converts the session to a domain at the end of the command | 152 | /// When set to True, converts the session to a domain at the end of the command |
| 133 | bool convert_to_domain{}; | 153 | bool convert_to_domain{}; |
| 154 | |||
| 155 | /// The name of this session (optional) | ||
| 156 | std::string name; | ||
| 134 | }; | 157 | }; |
| 135 | 158 | ||
| 136 | } // namespace Kernel | 159 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index 22d0c1dd5..62861da36 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/core.h" | ||
| 10 | #include "core/hle/kernel/errors.h" | 9 | #include "core/hle/kernel/errors.h" |
| 11 | #include "core/hle/kernel/kernel.h" | 10 | #include "core/hle/kernel/kernel.h" |
| 12 | #include "core/hle/kernel/shared_memory.h" | 11 | #include "core/hle/kernel/shared_memory.h" |
| @@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 34 | shared_memory->backing_block_offset = 0; | 33 | shared_memory->backing_block_offset = 0; |
| 35 | 34 | ||
| 36 | // Refresh the address mappings for the current process. | 35 | // Refresh the address mappings for the current process. |
| 37 | if (Core::CurrentProcess() != nullptr) { | 36 | if (kernel.CurrentProcess() != nullptr) { |
| 38 | Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( | 37 | kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings( |
| 39 | shared_memory->backing_block.get()); | 38 | shared_memory->backing_block.get()); |
| 40 | } | 39 | } |
| 41 | } else { | 40 | } else { |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index c5d399bab..77d0e3d96 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "core/hle/kernel/address_arbiter.h" | 20 | #include "core/hle/kernel/address_arbiter.h" |
| 21 | #include "core/hle/kernel/client_port.h" | 21 | #include "core/hle/kernel/client_port.h" |
| 22 | #include "core/hle/kernel/client_session.h" | 22 | #include "core/hle/kernel/client_session.h" |
| 23 | #include "core/hle/kernel/errors.h" | ||
| 23 | #include "core/hle/kernel/handle_table.h" | 24 | #include "core/hle/kernel/handle_table.h" |
| 24 | #include "core/hle/kernel/kernel.h" | 25 | #include "core/hle/kernel/kernel.h" |
| 25 | #include "core/hle/kernel/mutex.h" | 26 | #include "core/hle/kernel/mutex.h" |
| @@ -47,23 +48,6 @@ constexpr bool IsValidAddressRange(VAddr address, u64 size) { | |||
| 47 | return address + size > address; | 48 | return address + size > address; |
| 48 | } | 49 | } |
| 49 | 50 | ||
| 50 | // Checks if a given address range lies within a larger address range. | ||
| 51 | constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin, | ||
| 52 | VAddr address_range_end) { | ||
| 53 | const VAddr end_address = address + size - 1; | ||
| 54 | return address_range_begin <= address && end_address <= address_range_end - 1; | ||
| 55 | } | ||
| 56 | |||
| 57 | bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) { | ||
| 58 | return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(), | ||
| 59 | vm.GetAddressSpaceEndAddress()); | ||
| 60 | } | ||
| 61 | |||
| 62 | bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) { | ||
| 63 | return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(), | ||
| 64 | vm.GetNewMapRegionEndAddress()); | ||
| 65 | } | ||
| 66 | |||
| 67 | // 8 GiB | 51 | // 8 GiB |
| 68 | constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; | 52 | constexpr u64 MAIN_MEMORY_SIZE = 0x200000000; |
| 69 | 53 | ||
| @@ -105,14 +89,14 @@ ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_add | |||
| 105 | return ERR_INVALID_ADDRESS_STATE; | 89 | return ERR_INVALID_ADDRESS_STATE; |
| 106 | } | 90 | } |
| 107 | 91 | ||
| 108 | if (!IsInsideAddressSpace(vm_manager, src_addr, size)) { | 92 | if (!vm_manager.IsWithinAddressSpace(src_addr, size)) { |
| 109 | LOG_ERROR(Kernel_SVC, | 93 | LOG_ERROR(Kernel_SVC, |
| 110 | "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", | 94 | "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", |
| 111 | src_addr, size); | 95 | src_addr, size); |
| 112 | return ERR_INVALID_ADDRESS_STATE; | 96 | return ERR_INVALID_ADDRESS_STATE; |
| 113 | } | 97 | } |
| 114 | 98 | ||
| 115 | if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) { | 99 | if (!vm_manager.IsWithinNewMapRegion(dst_addr, size)) { |
| 116 | LOG_ERROR(Kernel_SVC, | 100 | LOG_ERROR(Kernel_SVC, |
| 117 | "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", | 101 | "Destination is not within the new map region, addr=0x{:016X}, size=0x{:016X}", |
| 118 | dst_addr, size); | 102 | dst_addr, size); |
| @@ -238,7 +222,7 @@ static ResultCode SetMemoryPermission(VAddr addr, u64 size, u32 prot) { | |||
| 238 | auto* const current_process = Core::CurrentProcess(); | 222 | auto* const current_process = Core::CurrentProcess(); |
| 239 | auto& vm_manager = current_process->VMManager(); | 223 | auto& vm_manager = current_process->VMManager(); |
| 240 | 224 | ||
| 241 | if (!IsInsideAddressSpace(vm_manager, addr, size)) { | 225 | if (!vm_manager.IsWithinAddressSpace(addr, size)) { |
| 242 | LOG_ERROR(Kernel_SVC, | 226 | LOG_ERROR(Kernel_SVC, |
| 243 | "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, | 227 | "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, |
| 244 | size); | 228 | size); |
| @@ -299,7 +283,7 @@ static ResultCode SetMemoryAttribute(VAddr address, u64 size, u32 mask, u32 attr | |||
| 299 | } | 283 | } |
| 300 | 284 | ||
| 301 | auto& vm_manager = Core::CurrentProcess()->VMManager(); | 285 | auto& vm_manager = Core::CurrentProcess()->VMManager(); |
| 302 | if (!IsInsideAddressSpace(vm_manager, address, size)) { | 286 | if (!vm_manager.IsWithinAddressSpace(address, size)) { |
| 303 | LOG_ERROR(Kernel_SVC, | 287 | LOG_ERROR(Kernel_SVC, |
| 304 | "Given address (0x{:016X}) is outside the bounds of the address space.", address); | 288 | "Given address (0x{:016X}) is outside the bounds of the address space.", address); |
| 305 | return ERR_INVALID_ADDRESS_STATE; | 289 | return ERR_INVALID_ADDRESS_STATE; |
| @@ -1495,20 +1479,10 @@ static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout | |||
| 1495 | return ERR_INVALID_ADDRESS; | 1479 | return ERR_INVALID_ADDRESS; |
| 1496 | } | 1480 | } |
| 1497 | 1481 | ||
| 1498 | switch (static_cast<AddressArbiter::ArbitrationType>(type)) { | 1482 | const auto arbitration_type = static_cast<AddressArbiter::ArbitrationType>(type); |
| 1499 | case AddressArbiter::ArbitrationType::WaitIfLessThan: | 1483 | auto& address_arbiter = |
| 1500 | return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false); | 1484 | Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter(); |
| 1501 | case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan: | 1485 | return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); |
| 1502 | return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true); | ||
| 1503 | case AddressArbiter::ArbitrationType::WaitIfEqual: | ||
| 1504 | return AddressArbiter::WaitForAddressIfEqual(address, value, timeout); | ||
| 1505 | default: | ||
| 1506 | LOG_ERROR(Kernel_SVC, | ||
| 1507 | "Invalid arbitration type, expected WaitIfLessThan, DecrementAndWaitIfLessThan " | ||
| 1508 | "or WaitIfEqual but got {}", | ||
| 1509 | type); | ||
| 1510 | return ERR_INVALID_ENUM_VALUE; | ||
| 1511 | } | ||
| 1512 | } | 1486 | } |
| 1513 | 1487 | ||
| 1514 | // Signals to an address (via Address Arbiter) | 1488 | // Signals to an address (via Address Arbiter) |
| @@ -1526,21 +1500,10 @@ static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to | |||
| 1526 | return ERR_INVALID_ADDRESS; | 1500 | return ERR_INVALID_ADDRESS; |
| 1527 | } | 1501 | } |
| 1528 | 1502 | ||
| 1529 | switch (static_cast<AddressArbiter::SignalType>(type)) { | 1503 | const auto signal_type = static_cast<AddressArbiter::SignalType>(type); |
| 1530 | case AddressArbiter::SignalType::Signal: | 1504 | auto& address_arbiter = |
| 1531 | return AddressArbiter::SignalToAddress(address, num_to_wake); | 1505 | Core::System::GetInstance().Kernel().CurrentProcess()->GetAddressArbiter(); |
| 1532 | case AddressArbiter::SignalType::IncrementAndSignalIfEqual: | 1506 | return address_arbiter.SignalToAddress(address, signal_type, value, num_to_wake); |
| 1533 | return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake); | ||
| 1534 | case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual: | ||
| 1535 | return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value, | ||
| 1536 | num_to_wake); | ||
| 1537 | default: | ||
| 1538 | LOG_ERROR(Kernel_SVC, | ||
| 1539 | "Invalid signal type, expected Signal, IncrementAndSignalIfEqual " | ||
| 1540 | "or ModifyByWaitingCountAndSignalIfEqual but got {}", | ||
| 1541 | type); | ||
| 1542 | return ERR_INVALID_ENUM_VALUE; | ||
| 1543 | } | ||
| 1544 | } | 1507 | } |
| 1545 | 1508 | ||
| 1546 | /// This returns the total CPU ticks elapsed since the CPU was powered-on | 1509 | /// This returns the total CPU ticks elapsed since the CPU was powered-on |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 6661e2130..eb54d6651 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -184,8 +184,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name | |||
| 184 | return ERR_INVALID_PROCESSOR_ID; | 184 | return ERR_INVALID_PROCESSOR_ID; |
| 185 | } | 185 | } |
| 186 | 186 | ||
| 187 | // TODO(yuriks): Other checks, returning 0xD9001BEA | ||
| 188 | |||
| 189 | if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { | 187 | if (!Memory::IsValidVirtualAddress(owner_process, entry_point)) { |
| 190 | LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); | 188 | LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:016X}", name, entry_point); |
| 191 | // TODO (bunnei): Find the correct error code to use here | 189 | // TODO (bunnei): Find the correct error code to use here |
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 10ad94aa6..05c59af34 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp | |||
| @@ -17,8 +17,8 @@ | |||
| 17 | #include "core/memory_setup.h" | 17 | #include "core/memory_setup.h" |
| 18 | 18 | ||
| 19 | namespace Kernel { | 19 | namespace Kernel { |
| 20 | 20 | namespace { | |
| 21 | static const char* GetMemoryStateName(MemoryState state) { | 21 | const char* GetMemoryStateName(MemoryState state) { |
| 22 | static constexpr const char* names[] = { | 22 | static constexpr const char* names[] = { |
| 23 | "Unmapped", "Io", | 23 | "Unmapped", "Io", |
| 24 | "Normal", "CodeStatic", | 24 | "Normal", "CodeStatic", |
| @@ -35,6 +35,14 @@ static const char* GetMemoryStateName(MemoryState state) { | |||
| 35 | return names[ToSvcMemoryState(state)]; | 35 | return names[ToSvcMemoryState(state)]; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | // Checks if a given address range lies within a larger address range. | ||
| 39 | constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin, | ||
| 40 | VAddr address_range_end) { | ||
| 41 | const VAddr end_address = address + size - 1; | ||
| 42 | return address_range_begin <= address && end_address <= address_range_end - 1; | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 38 | bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { | 46 | bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const { |
| 39 | ASSERT(base + size == next.base); | 47 | ASSERT(base + size == next.base); |
| 40 | if (permissions != next.permissions || state != next.state || attribute != next.attribute || | 48 | if (permissions != next.permissions || state != next.state || attribute != next.attribute || |
| @@ -249,8 +257,7 @@ ResultCode VMManager::ReprotectRange(VAddr target, u64 size, VMAPermission new_p | |||
| 249 | } | 257 | } |
| 250 | 258 | ||
| 251 | ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { | 259 | ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission perms) { |
| 252 | if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || | 260 | if (!IsWithinHeapRegion(target, size)) { |
| 253 | target + size < target) { | ||
| 254 | return ERR_INVALID_ADDRESS; | 261 | return ERR_INVALID_ADDRESS; |
| 255 | } | 262 | } |
| 256 | 263 | ||
| @@ -285,8 +292,7 @@ ResultVal<VAddr> VMManager::HeapAllocate(VAddr target, u64 size, VMAPermission p | |||
| 285 | } | 292 | } |
| 286 | 293 | ||
| 287 | ResultCode VMManager::HeapFree(VAddr target, u64 size) { | 294 | ResultCode VMManager::HeapFree(VAddr target, u64 size) { |
| 288 | if (target < GetHeapRegionBaseAddress() || target + size > GetHeapRegionEndAddress() || | 295 | if (!IsWithinHeapRegion(target, size)) { |
| 289 | target + size < target) { | ||
| 290 | return ERR_INVALID_ADDRESS; | 296 | return ERR_INVALID_ADDRESS; |
| 291 | } | 297 | } |
| 292 | 298 | ||
| @@ -706,6 +712,11 @@ u64 VMManager::GetAddressSpaceWidth() const { | |||
| 706 | return address_space_width; | 712 | return address_space_width; |
| 707 | } | 713 | } |
| 708 | 714 | ||
| 715 | bool VMManager::IsWithinAddressSpace(VAddr address, u64 size) const { | ||
| 716 | return IsInsideAddressRange(address, size, GetAddressSpaceBaseAddress(), | ||
| 717 | GetAddressSpaceEndAddress()); | ||
| 718 | } | ||
| 719 | |||
| 709 | VAddr VMManager::GetASLRRegionBaseAddress() const { | 720 | VAddr VMManager::GetASLRRegionBaseAddress() const { |
| 710 | return aslr_region_base; | 721 | return aslr_region_base; |
| 711 | } | 722 | } |
| @@ -750,6 +761,11 @@ u64 VMManager::GetCodeRegionSize() const { | |||
| 750 | return code_region_end - code_region_base; | 761 | return code_region_end - code_region_base; |
| 751 | } | 762 | } |
| 752 | 763 | ||
| 764 | bool VMManager::IsWithinCodeRegion(VAddr address, u64 size) const { | ||
| 765 | return IsInsideAddressRange(address, size, GetCodeRegionBaseAddress(), | ||
| 766 | GetCodeRegionEndAddress()); | ||
| 767 | } | ||
| 768 | |||
| 753 | VAddr VMManager::GetHeapRegionBaseAddress() const { | 769 | VAddr VMManager::GetHeapRegionBaseAddress() const { |
| 754 | return heap_region_base; | 770 | return heap_region_base; |
| 755 | } | 771 | } |
| @@ -762,6 +778,11 @@ u64 VMManager::GetHeapRegionSize() const { | |||
| 762 | return heap_region_end - heap_region_base; | 778 | return heap_region_end - heap_region_base; |
| 763 | } | 779 | } |
| 764 | 780 | ||
| 781 | bool VMManager::IsWithinHeapRegion(VAddr address, u64 size) const { | ||
| 782 | return IsInsideAddressRange(address, size, GetHeapRegionBaseAddress(), | ||
| 783 | GetHeapRegionEndAddress()); | ||
| 784 | } | ||
| 785 | |||
| 765 | VAddr VMManager::GetMapRegionBaseAddress() const { | 786 | VAddr VMManager::GetMapRegionBaseAddress() const { |
| 766 | return map_region_base; | 787 | return map_region_base; |
| 767 | } | 788 | } |
| @@ -774,6 +795,10 @@ u64 VMManager::GetMapRegionSize() const { | |||
| 774 | return map_region_end - map_region_base; | 795 | return map_region_end - map_region_base; |
| 775 | } | 796 | } |
| 776 | 797 | ||
| 798 | bool VMManager::IsWithinMapRegion(VAddr address, u64 size) const { | ||
| 799 | return IsInsideAddressRange(address, size, GetMapRegionBaseAddress(), GetMapRegionEndAddress()); | ||
| 800 | } | ||
| 801 | |||
| 777 | VAddr VMManager::GetNewMapRegionBaseAddress() const { | 802 | VAddr VMManager::GetNewMapRegionBaseAddress() const { |
| 778 | return new_map_region_base; | 803 | return new_map_region_base; |
| 779 | } | 804 | } |
| @@ -786,6 +811,11 @@ u64 VMManager::GetNewMapRegionSize() const { | |||
| 786 | return new_map_region_end - new_map_region_base; | 811 | return new_map_region_end - new_map_region_base; |
| 787 | } | 812 | } |
| 788 | 813 | ||
| 814 | bool VMManager::IsWithinNewMapRegion(VAddr address, u64 size) const { | ||
| 815 | return IsInsideAddressRange(address, size, GetNewMapRegionBaseAddress(), | ||
| 816 | GetNewMapRegionEndAddress()); | ||
| 817 | } | ||
| 818 | |||
| 789 | VAddr VMManager::GetTLSIORegionBaseAddress() const { | 819 | VAddr VMManager::GetTLSIORegionBaseAddress() const { |
| 790 | return tls_io_region_base; | 820 | return tls_io_region_base; |
| 791 | } | 821 | } |
| @@ -798,4 +828,9 @@ u64 VMManager::GetTLSIORegionSize() const { | |||
| 798 | return tls_io_region_end - tls_io_region_base; | 828 | return tls_io_region_end - tls_io_region_base; |
| 799 | } | 829 | } |
| 800 | 830 | ||
| 831 | bool VMManager::IsWithinTLSIORegion(VAddr address, u64 size) const { | ||
| 832 | return IsInsideAddressRange(address, size, GetTLSIORegionBaseAddress(), | ||
| 833 | GetTLSIORegionEndAddress()); | ||
| 834 | } | ||
| 835 | |||
| 801 | } // namespace Kernel | 836 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 6091533bc..88e0b3c02 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h | |||
| @@ -432,18 +432,21 @@ public: | |||
| 432 | /// Gets the address space width in bits. | 432 | /// Gets the address space width in bits. |
| 433 | u64 GetAddressSpaceWidth() const; | 433 | u64 GetAddressSpaceWidth() const; |
| 434 | 434 | ||
| 435 | /// Determines whether or not the given address range lies within the address space. | ||
| 436 | bool IsWithinAddressSpace(VAddr address, u64 size) const; | ||
| 437 | |||
| 435 | /// Gets the base address of the ASLR region. | 438 | /// Gets the base address of the ASLR region. |
| 436 | VAddr GetASLRRegionBaseAddress() const; | 439 | VAddr GetASLRRegionBaseAddress() const; |
| 437 | 440 | ||
| 438 | /// Gets the end address of the ASLR region. | 441 | /// Gets the end address of the ASLR region. |
| 439 | VAddr GetASLRRegionEndAddress() const; | 442 | VAddr GetASLRRegionEndAddress() const; |
| 440 | 443 | ||
| 441 | /// Determines whether or not the specified address range is within the ASLR region. | ||
| 442 | bool IsWithinASLRRegion(VAddr address, u64 size) const; | ||
| 443 | |||
| 444 | /// Gets the size of the ASLR region | 444 | /// Gets the size of the ASLR region |
| 445 | u64 GetASLRRegionSize() const; | 445 | u64 GetASLRRegionSize() const; |
| 446 | 446 | ||
| 447 | /// Determines whether or not the specified address range is within the ASLR region. | ||
| 448 | bool IsWithinASLRRegion(VAddr address, u64 size) const; | ||
| 449 | |||
| 447 | /// Gets the base address of the code region. | 450 | /// Gets the base address of the code region. |
| 448 | VAddr GetCodeRegionBaseAddress() const; | 451 | VAddr GetCodeRegionBaseAddress() const; |
| 449 | 452 | ||
| @@ -453,6 +456,9 @@ public: | |||
| 453 | /// Gets the total size of the code region in bytes. | 456 | /// Gets the total size of the code region in bytes. |
| 454 | u64 GetCodeRegionSize() const; | 457 | u64 GetCodeRegionSize() const; |
| 455 | 458 | ||
| 459 | /// Determines whether or not the specified range is within the code region. | ||
| 460 | bool IsWithinCodeRegion(VAddr address, u64 size) const; | ||
| 461 | |||
| 456 | /// Gets the base address of the heap region. | 462 | /// Gets the base address of the heap region. |
| 457 | VAddr GetHeapRegionBaseAddress() const; | 463 | VAddr GetHeapRegionBaseAddress() const; |
| 458 | 464 | ||
| @@ -462,6 +468,9 @@ public: | |||
| 462 | /// Gets the total size of the heap region in bytes. | 468 | /// Gets the total size of the heap region in bytes. |
| 463 | u64 GetHeapRegionSize() const; | 469 | u64 GetHeapRegionSize() const; |
| 464 | 470 | ||
| 471 | /// Determines whether or not the specified range is within the heap region. | ||
| 472 | bool IsWithinHeapRegion(VAddr address, u64 size) const; | ||
| 473 | |||
| 465 | /// Gets the base address of the map region. | 474 | /// Gets the base address of the map region. |
| 466 | VAddr GetMapRegionBaseAddress() const; | 475 | VAddr GetMapRegionBaseAddress() const; |
| 467 | 476 | ||
| @@ -471,6 +480,9 @@ public: | |||
| 471 | /// Gets the total size of the map region in bytes. | 480 | /// Gets the total size of the map region in bytes. |
| 472 | u64 GetMapRegionSize() const; | 481 | u64 GetMapRegionSize() const; |
| 473 | 482 | ||
| 483 | /// Determines whether or not the specified range is within the map region. | ||
| 484 | bool IsWithinMapRegion(VAddr address, u64 size) const; | ||
| 485 | |||
| 474 | /// Gets the base address of the new map region. | 486 | /// Gets the base address of the new map region. |
| 475 | VAddr GetNewMapRegionBaseAddress() const; | 487 | VAddr GetNewMapRegionBaseAddress() const; |
| 476 | 488 | ||
| @@ -480,6 +492,9 @@ public: | |||
| 480 | /// Gets the total size of the new map region in bytes. | 492 | /// Gets the total size of the new map region in bytes. |
| 481 | u64 GetNewMapRegionSize() const; | 493 | u64 GetNewMapRegionSize() const; |
| 482 | 494 | ||
| 495 | /// Determines whether or not the given address range is within the new map region | ||
| 496 | bool IsWithinNewMapRegion(VAddr address, u64 size) const; | ||
| 497 | |||
| 483 | /// Gets the base address of the TLS IO region. | 498 | /// Gets the base address of the TLS IO region. |
| 484 | VAddr GetTLSIORegionBaseAddress() const; | 499 | VAddr GetTLSIORegionBaseAddress() const; |
| 485 | 500 | ||
| @@ -489,6 +504,9 @@ public: | |||
| 489 | /// Gets the total size of the TLS IO region in bytes. | 504 | /// Gets the total size of the TLS IO region in bytes. |
| 490 | u64 GetTLSIORegionSize() const; | 505 | u64 GetTLSIORegionSize() const; |
| 491 | 506 | ||
| 507 | /// Determines if the given address range is within the TLS IO region. | ||
| 508 | bool IsWithinTLSIORegion(VAddr address, u64 size) const; | ||
| 509 | |||
| 492 | /// Each VMManager has its own page table, which is set as the main one when the owning process | 510 | /// Each VMManager has its own page table, which is set as the main one when the owning process |
| 493 | /// is scheduled. | 511 | /// is scheduled. |
| 494 | Memory::PageTable page_table; | 512 | Memory::PageTable page_table; |
diff --git a/src/core/hle/result.h b/src/core/hle/result.h index bfb77cc31..1ed144481 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 13 | 12 | ||
| 14 | // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes | 13 | // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes |
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index f255f74b5..8c5bd6059 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "common/string_util.h" | 7 | #include "common/string_util.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/frontend/applets/software_keyboard.h" | 9 | #include "core/frontend/applets/software_keyboard.h" |
| 10 | #include "core/hle/result.h" | ||
| 10 | #include "core/hle/service/am/am.h" | 11 | #include "core/hle/service/am/am.h" |
| 11 | #include "core/hle/service/am/applets/software_keyboard.h" | 12 | #include "core/hle/service/am/applets/software_keyboard.h" |
| 12 | 13 | ||
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h index efd5753a1..b93a30d28 100644 --- a/src/core/hle/service/am/applets/software_keyboard.h +++ b/src/core/hle/service/am/applets/software_keyboard.h | |||
| @@ -9,10 +9,13 @@ | |||
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | ||
| 12 | #include "common/swap.h" | 13 | #include "common/swap.h" |
| 13 | #include "core/hle/service/am/am.h" | 14 | #include "core/hle/service/am/am.h" |
| 14 | #include "core/hle/service/am/applets/applets.h" | 15 | #include "core/hle/service/am/applets/applets.h" |
| 15 | 16 | ||
| 17 | union ResultCode; | ||
| 18 | |||
| 16 | namespace Service::AM::Applets { | 19 | namespace Service::AM::Applets { |
| 17 | 20 | ||
| 18 | enum class KeysetDisable : u32 { | 21 | enum class KeysetDisable : u32 { |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index 6831c0735..21f5e64c7 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -18,17 +18,11 @@ | |||
| 18 | #include "core/hle/kernel/readable_event.h" | 18 | #include "core/hle/kernel/readable_event.h" |
| 19 | #include "core/hle/kernel/writable_event.h" | 19 | #include "core/hle/kernel/writable_event.h" |
| 20 | #include "core/hle/service/audio/audout_u.h" | 20 | #include "core/hle/service/audio/audout_u.h" |
| 21 | #include "core/hle/service/audio/errors.h" | ||
| 21 | #include "core/memory.h" | 22 | #include "core/memory.h" |
| 22 | 23 | ||
| 23 | namespace Service::Audio { | 24 | namespace Service::Audio { |
| 24 | 25 | ||
| 25 | namespace ErrCodes { | ||
| 26 | enum { | ||
| 27 | ErrorUnknown = 2, | ||
| 28 | BufferCountExceeded = 8, | ||
| 29 | }; | ||
| 30 | } | ||
| 31 | |||
| 32 | constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; | 26 | constexpr std::array<char, 10> DefaultDevice{{"DeviceOut"}}; |
| 33 | constexpr int DefaultSampleRate{48000}; | 27 | constexpr int DefaultSampleRate{48000}; |
| 34 | 28 | ||
| @@ -100,7 +94,7 @@ private: | |||
| 100 | 94 | ||
| 101 | if (stream->IsPlaying()) { | 95 | if (stream->IsPlaying()) { |
| 102 | IPC::ResponseBuilder rb{ctx, 2}; | 96 | IPC::ResponseBuilder rb{ctx, 2}; |
| 103 | rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::ErrorUnknown)); | 97 | rb.Push(ERR_OPERATION_FAILED); |
| 104 | return; | 98 | return; |
| 105 | } | 99 | } |
| 106 | 100 | ||
| @@ -113,7 +107,9 @@ private: | |||
| 113 | void StopAudioOut(Kernel::HLERequestContext& ctx) { | 107 | void StopAudioOut(Kernel::HLERequestContext& ctx) { |
| 114 | LOG_DEBUG(Service_Audio, "called"); | 108 | LOG_DEBUG(Service_Audio, "called"); |
| 115 | 109 | ||
| 116 | audio_core.StopStream(stream); | 110 | if (stream->IsPlaying()) { |
| 111 | audio_core.StopStream(stream); | ||
| 112 | } | ||
| 117 | 113 | ||
| 118 | IPC::ResponseBuilder rb{ctx, 2}; | 114 | IPC::ResponseBuilder rb{ctx, 2}; |
| 119 | rb.Push(RESULT_SUCCESS); | 115 | rb.Push(RESULT_SUCCESS); |
| @@ -143,7 +139,8 @@ private: | |||
| 143 | 139 | ||
| 144 | if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { | 140 | if (!audio_core.QueueBuffer(stream, tag, std::move(samples))) { |
| 145 | IPC::ResponseBuilder rb{ctx, 2}; | 141 | IPC::ResponseBuilder rb{ctx, 2}; |
| 146 | rb.Push(ResultCode(ErrorModule::Audio, ErrCodes::BufferCountExceeded)); | 142 | rb.Push(ERR_BUFFER_COUNT_EXCEEDED); |
| 143 | return; | ||
| 147 | } | 144 | } |
| 148 | 145 | ||
| 149 | IPC::ResponseBuilder rb{ctx, 2}; | 146 | IPC::ResponseBuilder rb{ctx, 2}; |
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp index ea8f9d0bb..c9de10a24 100644 --- a/src/core/hle/service/audio/audren_u.cpp +++ b/src/core/hle/service/audio/audren_u.cpp | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "core/hle/kernel/readable_event.h" | 17 | #include "core/hle/kernel/readable_event.h" |
| 18 | #include "core/hle/kernel/writable_event.h" | 18 | #include "core/hle/kernel/writable_event.h" |
| 19 | #include "core/hle/service/audio/audren_u.h" | 19 | #include "core/hle/service/audio/audren_u.h" |
| 20 | #include "core/hle/service/audio/errors.h" | ||
| 20 | 21 | ||
| 21 | namespace Service::Audio { | 22 | namespace Service::Audio { |
| 22 | 23 | ||
| @@ -146,7 +147,7 @@ private: | |||
| 146 | // code in this case. | 147 | // code in this case. |
| 147 | 148 | ||
| 148 | IPC::ResponseBuilder rb{ctx, 2}; | 149 | IPC::ResponseBuilder rb{ctx, 2}; |
| 149 | rb.Push(ResultCode{ErrorModule::Audio, 201}); | 150 | rb.Push(ERR_NOT_SUPPORTED); |
| 150 | } | 151 | } |
| 151 | 152 | ||
| 152 | Kernel::EventPair system_event; | 153 | Kernel::EventPair system_event; |
diff --git a/src/core/hle/service/audio/errors.h b/src/core/hle/service/audio/errors.h new file mode 100644 index 000000000..6f8c09bcf --- /dev/null +++ b/src/core/hle/service/audio/errors.h | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | // Copyright 2019 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/hle/result.h" | ||
| 8 | |||
| 9 | namespace Service::Audio { | ||
| 10 | |||
| 11 | constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::Audio, 2}; | ||
| 12 | constexpr ResultCode ERR_BUFFER_COUNT_EXCEEDED{ErrorModule::Audio, 8}; | ||
| 13 | constexpr ResultCode ERR_NOT_SUPPORTED{ErrorModule::Audio, 513}; | ||
| 14 | |||
| 15 | } // namespace Service::Audio | ||
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index 11eba4a12..377e12cfa 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp | |||
| @@ -9,43 +9,32 @@ | |||
| 9 | 9 | ||
| 10 | #include <opus.h> | 10 | #include <opus.h> |
| 11 | 11 | ||
| 12 | #include "common/common_funcs.h" | 12 | #include "common/assert.h" |
| 13 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| 14 | #include "core/hle/ipc_helpers.h" | 14 | #include "core/hle/ipc_helpers.h" |
| 15 | #include "core/hle/kernel/hle_ipc.h" | 15 | #include "core/hle/kernel/hle_ipc.h" |
| 16 | #include "core/hle/service/audio/hwopus.h" | 16 | #include "core/hle/service/audio/hwopus.h" |
| 17 | 17 | ||
| 18 | namespace Service::Audio { | 18 | namespace Service::Audio { |
| 19 | 19 | namespace { | |
| 20 | struct OpusDeleter { | 20 | struct OpusDeleter { |
| 21 | void operator()(void* ptr) const { | 21 | void operator()(void* ptr) const { |
| 22 | operator delete(ptr); | 22 | operator delete(ptr); |
| 23 | } | 23 | } |
| 24 | }; | 24 | }; |
| 25 | 25 | ||
| 26 | class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { | 26 | using OpusDecoderPtr = std::unique_ptr<OpusDecoder, OpusDeleter>; |
| 27 | public: | ||
| 28 | IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoder, OpusDeleter> decoder, u32 sample_rate, | ||
| 29 | u32 channel_count) | ||
| 30 | : ServiceFramework("IHardwareOpusDecoderManager"), decoder(std::move(decoder)), | ||
| 31 | sample_rate(sample_rate), channel_count(channel_count) { | ||
| 32 | // clang-format off | ||
| 33 | static const FunctionInfo functions[] = { | ||
| 34 | {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"}, | ||
| 35 | {1, nullptr, "SetContext"}, | ||
| 36 | {2, nullptr, "DecodeInterleavedForMultiStreamOld"}, | ||
| 37 | {3, nullptr, "SetContextForMultiStream"}, | ||
| 38 | {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, | ||
| 39 | {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, | ||
| 40 | {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, | ||
| 41 | {7, nullptr, "DecodeInterleavedForMultiStream"}, | ||
| 42 | }; | ||
| 43 | // clang-format on | ||
| 44 | 27 | ||
| 45 | RegisterHandlers(functions); | 28 | struct OpusPacketHeader { |
| 46 | } | 29 | // Packet size in bytes. |
| 30 | u32_be size; | ||
| 31 | // Indicates the final range of the codec's entropy coder. | ||
| 32 | u32_be final_range; | ||
| 33 | }; | ||
| 34 | static_assert(sizeof(OpusPacketHeader) == 0x8, "OpusHeader is an invalid size"); | ||
| 47 | 35 | ||
| 48 | private: | 36 | class OpusDecoderStateBase { |
| 37 | public: | ||
| 49 | /// Describes extra behavior that may be asked of the decoding context. | 38 | /// Describes extra behavior that may be asked of the decoding context. |
| 50 | enum class ExtraBehavior { | 39 | enum class ExtraBehavior { |
| 51 | /// No extra behavior. | 40 | /// No extra behavior. |
| @@ -55,30 +44,36 @@ private: | |||
| 55 | ResetContext, | 44 | ResetContext, |
| 56 | }; | 45 | }; |
| 57 | 46 | ||
| 58 | void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { | 47 | enum class PerfTime { |
| 59 | LOG_DEBUG(Audio, "called"); | 48 | Disabled, |
| 60 | 49 | Enabled, | |
| 61 | DecodeInterleavedHelper(ctx, nullptr, ExtraBehavior::None); | 50 | }; |
| 62 | } | ||
| 63 | |||
| 64 | void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) { | ||
| 65 | LOG_DEBUG(Audio, "called"); | ||
| 66 | 51 | ||
| 67 | u64 performance = 0; | 52 | virtual ~OpusDecoderStateBase() = default; |
| 68 | DecodeInterleavedHelper(ctx, &performance, ExtraBehavior::None); | ||
| 69 | } | ||
| 70 | 53 | ||
| 71 | void DecodeInterleaved(Kernel::HLERequestContext& ctx) { | 54 | // Decodes interleaved Opus packets. Optionally allows reporting time taken to |
| 72 | LOG_DEBUG(Audio, "called"); | 55 | // perform the decoding, as well as any relevant extra behavior. |
| 73 | 56 | virtual void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time, | |
| 74 | IPC::RequestParser rp{ctx}; | 57 | ExtraBehavior extra_behavior) = 0; |
| 75 | const auto extra_behavior = | 58 | }; |
| 76 | rp.Pop<bool>() ? ExtraBehavior::ResetContext : ExtraBehavior::None; | ||
| 77 | 59 | ||
| 78 | u64 performance = 0; | 60 | // Represents the decoder state for a non-multistream decoder. |
| 79 | DecodeInterleavedHelper(ctx, &performance, extra_behavior); | 61 | class OpusDecoderState final : public OpusDecoderStateBase { |
| 62 | public: | ||
| 63 | explicit OpusDecoderState(OpusDecoderPtr decoder, u32 sample_rate, u32 channel_count) | ||
| 64 | : decoder{std::move(decoder)}, sample_rate{sample_rate}, channel_count{channel_count} {} | ||
| 65 | |||
| 66 | void DecodeInterleaved(Kernel::HLERequestContext& ctx, PerfTime perf_time, | ||
| 67 | ExtraBehavior extra_behavior) override { | ||
| 68 | if (perf_time == PerfTime::Disabled) { | ||
| 69 | DecodeInterleavedHelper(ctx, nullptr, extra_behavior); | ||
| 70 | } else { | ||
| 71 | u64 performance = 0; | ||
| 72 | DecodeInterleavedHelper(ctx, &performance, extra_behavior); | ||
| 73 | } | ||
| 80 | } | 74 | } |
| 81 | 75 | ||
| 76 | private: | ||
| 82 | void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, | 77 | void DecodeInterleavedHelper(Kernel::HLERequestContext& ctx, u64* performance, |
| 83 | ExtraBehavior extra_behavior) { | 78 | ExtraBehavior extra_behavior) { |
| 84 | u32 consumed = 0; | 79 | u32 consumed = 0; |
| @@ -89,8 +84,7 @@ private: | |||
| 89 | ResetDecoderContext(); | 84 | ResetDecoderContext(); |
| 90 | } | 85 | } |
| 91 | 86 | ||
| 92 | if (!Decoder_DecodeInterleaved(consumed, sample_count, ctx.ReadBuffer(), samples, | 87 | if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { |
| 93 | performance)) { | ||
| 94 | LOG_ERROR(Audio, "Failed to decode opus data"); | 88 | LOG_ERROR(Audio, "Failed to decode opus data"); |
| 95 | IPC::ResponseBuilder rb{ctx, 2}; | 89 | IPC::ResponseBuilder rb{ctx, 2}; |
| 96 | // TODO(ogniK): Use correct error code | 90 | // TODO(ogniK): Use correct error code |
| @@ -109,27 +103,27 @@ private: | |||
| 109 | ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); | 103 | ctx.WriteBuffer(samples.data(), samples.size() * sizeof(s16)); |
| 110 | } | 104 | } |
| 111 | 105 | ||
| 112 | bool Decoder_DecodeInterleaved(u32& consumed, u32& sample_count, const std::vector<u8>& input, | 106 | bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector<u8>& input, |
| 113 | std::vector<opus_int16>& output, u64* out_performance_time) { | 107 | std::vector<opus_int16>& output, u64* out_performance_time) const { |
| 114 | const auto start_time = std::chrono::high_resolution_clock::now(); | 108 | const auto start_time = std::chrono::high_resolution_clock::now(); |
| 115 | const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); | 109 | const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); |
| 116 | if (sizeof(OpusHeader) > input.size()) { | 110 | if (sizeof(OpusPacketHeader) > input.size()) { |
| 117 | LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", | 111 | LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", |
| 118 | sizeof(OpusHeader), input.size()); | 112 | sizeof(OpusPacketHeader), input.size()); |
| 119 | return false; | 113 | return false; |
| 120 | } | 114 | } |
| 121 | 115 | ||
| 122 | OpusHeader hdr{}; | 116 | OpusPacketHeader hdr{}; |
| 123 | std::memcpy(&hdr, input.data(), sizeof(OpusHeader)); | 117 | std::memcpy(&hdr, input.data(), sizeof(OpusPacketHeader)); |
| 124 | if (sizeof(OpusHeader) + static_cast<u32>(hdr.sz) > input.size()) { | 118 | if (sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size) > input.size()) { |
| 125 | LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", | 119 | LOG_ERROR(Audio, "Input does not fit in the opus header size. data_sz={}, input_sz={}", |
| 126 | sizeof(OpusHeader) + static_cast<u32>(hdr.sz), input.size()); | 120 | sizeof(OpusPacketHeader) + static_cast<u32>(hdr.size), input.size()); |
| 127 | return false; | 121 | return false; |
| 128 | } | 122 | } |
| 129 | 123 | ||
| 130 | const auto frame = input.data() + sizeof(OpusHeader); | 124 | const auto frame = input.data() + sizeof(OpusPacketHeader); |
| 131 | const auto decoded_sample_count = opus_packet_get_nb_samples( | 125 | const auto decoded_sample_count = opus_packet_get_nb_samples( |
| 132 | frame, static_cast<opus_int32>(input.size() - sizeof(OpusHeader)), | 126 | frame, static_cast<opus_int32>(input.size() - sizeof(OpusPacketHeader)), |
| 133 | static_cast<opus_int32>(sample_rate)); | 127 | static_cast<opus_int32>(sample_rate)); |
| 134 | if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { | 128 | if (decoded_sample_count * channel_count * sizeof(u16) > raw_output_sz) { |
| 135 | LOG_ERROR( | 129 | LOG_ERROR( |
| @@ -141,18 +135,18 @@ private: | |||
| 141 | 135 | ||
| 142 | const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); | 136 | const int frame_size = (static_cast<int>(raw_output_sz / sizeof(s16) / channel_count)); |
| 143 | const auto out_sample_count = | 137 | const auto out_sample_count = |
| 144 | opus_decode(decoder.get(), frame, hdr.sz, output.data(), frame_size, 0); | 138 | opus_decode(decoder.get(), frame, hdr.size, output.data(), frame_size, 0); |
| 145 | if (out_sample_count < 0) { | 139 | if (out_sample_count < 0) { |
| 146 | LOG_ERROR(Audio, | 140 | LOG_ERROR(Audio, |
| 147 | "Incorrect sample count received from opus_decode, " | 141 | "Incorrect sample count received from opus_decode, " |
| 148 | "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", | 142 | "output_sample_count={}, frame_size={}, data_sz_from_hdr={}", |
| 149 | out_sample_count, frame_size, static_cast<u32>(hdr.sz)); | 143 | out_sample_count, frame_size, static_cast<u32>(hdr.size)); |
| 150 | return false; | 144 | return false; |
| 151 | } | 145 | } |
| 152 | 146 | ||
| 153 | const auto end_time = std::chrono::high_resolution_clock::now() - start_time; | 147 | const auto end_time = std::chrono::high_resolution_clock::now() - start_time; |
| 154 | sample_count = out_sample_count; | 148 | sample_count = out_sample_count; |
| 155 | consumed = static_cast<u32>(sizeof(OpusHeader) + hdr.sz); | 149 | consumed = static_cast<u32>(sizeof(OpusPacketHeader) + hdr.size); |
| 156 | if (out_performance_time != nullptr) { | 150 | if (out_performance_time != nullptr) { |
| 157 | *out_performance_time = | 151 | *out_performance_time = |
| 158 | std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); | 152 | std::chrono::duration_cast<std::chrono::milliseconds>(end_time).count(); |
| @@ -167,21 +161,66 @@ private: | |||
| 167 | opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); | 161 | opus_decoder_ctl(decoder.get(), OPUS_RESET_STATE); |
| 168 | } | 162 | } |
| 169 | 163 | ||
| 170 | struct OpusHeader { | 164 | OpusDecoderPtr decoder; |
| 171 | u32_be sz; // Needs to be BE for some odd reason | ||
| 172 | INSERT_PADDING_WORDS(1); | ||
| 173 | }; | ||
| 174 | static_assert(sizeof(OpusHeader) == 0x8, "OpusHeader is an invalid size"); | ||
| 175 | |||
| 176 | std::unique_ptr<OpusDecoder, OpusDeleter> decoder; | ||
| 177 | u32 sample_rate; | 165 | u32 sample_rate; |
| 178 | u32 channel_count; | 166 | u32 channel_count; |
| 179 | }; | 167 | }; |
| 180 | 168 | ||
| 181 | static std::size_t WorkerBufferSize(u32 channel_count) { | 169 | class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { |
| 170 | public: | ||
| 171 | explicit IHardwareOpusDecoderManager(std::unique_ptr<OpusDecoderStateBase> decoder_state) | ||
| 172 | : ServiceFramework("IHardwareOpusDecoderManager"), decoder_state{std::move(decoder_state)} { | ||
| 173 | // clang-format off | ||
| 174 | static const FunctionInfo functions[] = { | ||
| 175 | {0, &IHardwareOpusDecoderManager::DecodeInterleavedOld, "DecodeInterleavedOld"}, | ||
| 176 | {1, nullptr, "SetContext"}, | ||
| 177 | {2, nullptr, "DecodeInterleavedForMultiStreamOld"}, | ||
| 178 | {3, nullptr, "SetContextForMultiStream"}, | ||
| 179 | {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, | ||
| 180 | {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, | ||
| 181 | {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, | ||
| 182 | {7, nullptr, "DecodeInterleavedForMultiStream"}, | ||
| 183 | }; | ||
| 184 | // clang-format on | ||
| 185 | |||
| 186 | RegisterHandlers(functions); | ||
| 187 | } | ||
| 188 | |||
| 189 | private: | ||
| 190 | void DecodeInterleavedOld(Kernel::HLERequestContext& ctx) { | ||
| 191 | LOG_DEBUG(Audio, "called"); | ||
| 192 | |||
| 193 | decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Disabled, | ||
| 194 | OpusDecoderStateBase::ExtraBehavior::None); | ||
| 195 | } | ||
| 196 | |||
| 197 | void DecodeInterleavedWithPerfOld(Kernel::HLERequestContext& ctx) { | ||
| 198 | LOG_DEBUG(Audio, "called"); | ||
| 199 | |||
| 200 | decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled, | ||
| 201 | OpusDecoderStateBase::ExtraBehavior::None); | ||
| 202 | } | ||
| 203 | |||
| 204 | void DecodeInterleaved(Kernel::HLERequestContext& ctx) { | ||
| 205 | LOG_DEBUG(Audio, "called"); | ||
| 206 | |||
| 207 | IPC::RequestParser rp{ctx}; | ||
| 208 | const auto extra_behavior = rp.Pop<bool>() | ||
| 209 | ? OpusDecoderStateBase::ExtraBehavior::ResetContext | ||
| 210 | : OpusDecoderStateBase::ExtraBehavior::None; | ||
| 211 | |||
| 212 | decoder_state->DecodeInterleaved(ctx, OpusDecoderStateBase::PerfTime::Enabled, | ||
| 213 | extra_behavior); | ||
| 214 | } | ||
| 215 | |||
| 216 | std::unique_ptr<OpusDecoderStateBase> decoder_state; | ||
| 217 | }; | ||
| 218 | |||
| 219 | std::size_t WorkerBufferSize(u32 channel_count) { | ||
| 182 | ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); | 220 | ASSERT_MSG(channel_count == 1 || channel_count == 2, "Invalid channel count"); |
| 183 | return opus_decoder_get_size(static_cast<int>(channel_count)); | 221 | return opus_decoder_get_size(static_cast<int>(channel_count)); |
| 184 | } | 222 | } |
| 223 | } // Anonymous namespace | ||
| 185 | 224 | ||
| 186 | void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { | 225 | void HwOpus::GetWorkBufferSize(Kernel::HLERequestContext& ctx) { |
| 187 | IPC::RequestParser rp{ctx}; | 226 | IPC::RequestParser rp{ctx}; |
| @@ -220,8 +259,7 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { | |||
| 220 | const std::size_t worker_sz = WorkerBufferSize(channel_count); | 259 | const std::size_t worker_sz = WorkerBufferSize(channel_count); |
| 221 | ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); | 260 | ASSERT_MSG(buffer_sz >= worker_sz, "Worker buffer too large"); |
| 222 | 261 | ||
| 223 | std::unique_ptr<OpusDecoder, OpusDeleter> decoder{ | 262 | OpusDecoderPtr decoder{static_cast<OpusDecoder*>(operator new(worker_sz))}; |
| 224 | static_cast<OpusDecoder*>(operator new(worker_sz))}; | ||
| 225 | if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { | 263 | if (const int err = opus_decoder_init(decoder.get(), sample_rate, channel_count)) { |
| 226 | LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); | 264 | LOG_ERROR(Audio, "Failed to init opus decoder with error={}", err); |
| 227 | IPC::ResponseBuilder rb{ctx, 2}; | 265 | IPC::ResponseBuilder rb{ctx, 2}; |
| @@ -232,8 +270,8 @@ void HwOpus::OpenOpusDecoder(Kernel::HLERequestContext& ctx) { | |||
| 232 | 270 | ||
| 233 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 271 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 234 | rb.Push(RESULT_SUCCESS); | 272 | rb.Push(RESULT_SUCCESS); |
| 235 | rb.PushIpcInterface<IHardwareOpusDecoderManager>(std::move(decoder), sample_rate, | 273 | rb.PushIpcInterface<IHardwareOpusDecoderManager>( |
| 236 | channel_count); | 274 | std::make_unique<OpusDecoderState>(std::move(decoder), sample_rate, channel_count)); |
| 237 | } | 275 | } |
| 238 | 276 | ||
| 239 | HwOpus::HwOpus() : ServiceFramework("hwopus") { | 277 | HwOpus::HwOpus() : ServiceFramework("hwopus") { |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index dbe7ee6e8..20c7c39aa 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 36 | 36 | ||
| 37 | auto& instance = Core::System::GetInstance(); | 37 | auto& instance = Core::System::GetInstance(); |
| 38 | instance.GetPerfStats().EndGameFrame(); | 38 | instance.GetPerfStats().EndGameFrame(); |
| 39 | instance.Renderer().SwapBuffers(framebuffer); | 39 | instance.GPU().SwapBuffers(framebuffer); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | } // namespace Service::Nvidia::Devices | 42 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 466db7ccd..a34b9e753 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 178 | auto& gpu = system_instance.GPU(); | 178 | auto& gpu = system_instance.GPU(); |
| 179 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); | 179 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); |
| 180 | ASSERT(cpu_addr); | 180 | ASSERT(cpu_addr); |
| 181 | system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); | 181 | gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); |
| 182 | 182 | ||
| 183 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); | 183 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 184 | 184 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 0a650f36c..8ce7bc7a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< | |||
| 136 | return 0; | 136 | return 0; |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | static void PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 140 | if (entries.empty()) { | ||
| 141 | return; | ||
| 142 | } | ||
| 143 | |||
| 144 | auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()}; | ||
| 145 | dma_pusher.Push(std::move(entries)); | ||
| 146 | dma_pusher.DispatchCalls(); | ||
| 147 | } | ||
| 148 | |||
| 149 | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | 139 | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { |
| 150 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { | 140 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { |
| 151 | UNIMPLEMENTED(); | 141 | UNIMPLEMENTED(); |
| @@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 163 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | 153 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |
| 164 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 154 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 165 | 155 | ||
| 166 | PushGPUEntries(std::move(entries)); | 156 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); |
| 167 | 157 | ||
| 168 | params.fence_out.id = 0; | 158 | params.fence_out.id = 0; |
| 169 | params.fence_out.value = 0; | 159 | params.fence_out.value = 0; |
| @@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 184 | Memory::ReadBlock(params.address, entries.data(), | 174 | Memory::ReadBlock(params.address, entries.data(), |
| 185 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 175 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 186 | 176 | ||
| 187 | PushGPUEntries(std::move(entries)); | 177 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); |
| 188 | 178 | ||
| 189 | params.fence_out.id = 0; | 179 | params.fence_out.id = 0; |
| 190 | params.fence_out.value = 0; | 180 | params.fence_out.value = 0; |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 56f31e2ac..fc496b654 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -186,7 +186,7 @@ void NVFlinger::Compose() { | |||
| 186 | 186 | ||
| 187 | // There was no queued buffer to draw, render previous frame | 187 | // There was no queued buffer to draw, render previous frame |
| 188 | system_instance.GetPerfStats().EndGameFrame(); | 188 | system_instance.GetPerfStats().EndGameFrame(); |
| 189 | system_instance.Renderer().SwapBuffers({}); | 189 | system_instance.GPU().SwapBuffers({}); |
| 190 | continue; | 190 | continue; |
| 191 | } | 191 | } |
| 192 | 192 | ||
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index 117f87a45..576fd6407 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp | |||
| @@ -76,7 +76,8 @@ namespace Service { | |||
| 76 | * Creates a function string for logging, complete with the name (or header code, depending | 76 | * Creates a function string for logging, complete with the name (or header code, depending |
| 77 | * on what's passed in) the port name, and all the cmd_buff arguments. | 77 | * on what's passed in) the port name, and all the cmd_buff arguments. |
| 78 | */ | 78 | */ |
| 79 | [[maybe_unused]] static std::string MakeFunctionString(const char* name, const char* port_name, | 79 | [[maybe_unused]] static std::string MakeFunctionString(std::string_view name, |
| 80 | std::string_view port_name, | ||
| 80 | const u32* cmd_buff) { | 81 | const u32* cmd_buff) { |
| 81 | // Number of params == bits 0-5 + bits 6-11 | 82 | // Number of params == bits 0-5 + bits 6-11 |
| 82 | int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F); | 83 | int num_params = (cmd_buff[0] & 0x3F) + ((cmd_buff[0] >> 6) & 0x3F); |
| @@ -158,9 +159,7 @@ void ServiceFrameworkBase::InvokeRequest(Kernel::HLERequestContext& ctx) { | |||
| 158 | return ReportUnimplementedFunction(ctx, info); | 159 | return ReportUnimplementedFunction(ctx, info); |
| 159 | } | 160 | } |
| 160 | 161 | ||
| 161 | LOG_TRACE( | 162 | LOG_TRACE(Service, "{}", MakeFunctionString(info->name, GetServiceName(), ctx.CommandBuffer())); |
| 162 | Service, "{}", | ||
| 163 | MakeFunctionString(info->name, GetServiceName().c_str(), ctx.CommandBuffer()).c_str()); | ||
| 164 | handler_invoker(this, info->handler_callback, ctx); | 163 | handler_invoker(this, info->handler_callback, ctx); |
| 165 | } | 164 | } |
| 166 | 165 | ||
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp index 74da4d5e6..e9ee73710 100644 --- a/src/core/hle/service/sm/controller.cpp +++ b/src/core/hle/service/sm/controller.cpp | |||
| @@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) { | |||
| 30 | 30 | ||
| 31 | IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; | 31 | IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; |
| 32 | rb.Push(RESULT_SUCCESS); | 32 | rb.Push(RESULT_SUCCESS); |
| 33 | Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; | 33 | Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client}; |
| 34 | rb.PushMoveObjects(session); | 34 | rb.PushMoveObjects(session); |
| 35 | 35 | ||
| 36 | LOG_DEBUG(Service, "session={}", session->GetObjectId()); | 36 | LOG_DEBUG(Service, "session={}", session->GetObjectId()); |
diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index a975767bb..566cd6006 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include "core/hle/service/nvdrv/nvdrv.h" | 24 | #include "core/hle/service/nvdrv/nvdrv.h" |
| 25 | #include "core/hle/service/nvflinger/buffer_queue.h" | 25 | #include "core/hle/service/nvflinger/buffer_queue.h" |
| 26 | #include "core/hle/service/nvflinger/nvflinger.h" | 26 | #include "core/hle/service/nvflinger/nvflinger.h" |
| 27 | #include "core/hle/service/service.h" | ||
| 27 | #include "core/hle/service/vi/vi.h" | 28 | #include "core/hle/service/vi/vi.h" |
| 28 | #include "core/hle/service/vi/vi_m.h" | 29 | #include "core/hle/service/vi/vi_m.h" |
| 29 | #include "core/hle/service/vi/vi_s.h" | 30 | #include "core/hle/service/vi/vi_s.h" |
| @@ -33,6 +34,7 @@ | |||
| 33 | namespace Service::VI { | 34 | namespace Service::VI { |
| 34 | 35 | ||
| 35 | constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; | 36 | constexpr ResultCode ERR_OPERATION_FAILED{ErrorModule::VI, 1}; |
| 37 | constexpr ResultCode ERR_PERMISSION_DENIED{ErrorModule::VI, 5}; | ||
| 36 | constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; | 38 | constexpr ResultCode ERR_UNSUPPORTED{ErrorModule::VI, 6}; |
| 37 | constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7}; | 39 | constexpr ResultCode ERR_NOT_FOUND{ErrorModule::VI, 7}; |
| 38 | 40 | ||
| @@ -1203,26 +1205,40 @@ IApplicationDisplayService::IApplicationDisplayService( | |||
| 1203 | RegisterHandlers(functions); | 1205 | RegisterHandlers(functions); |
| 1204 | } | 1206 | } |
| 1205 | 1207 | ||
| 1206 | Module::Interface::Interface(std::shared_ptr<Module> module, const char* name, | 1208 | static bool IsValidServiceAccess(Permission permission, Policy policy) { |
| 1207 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) | 1209 | if (permission == Permission::User) { |
| 1208 | : ServiceFramework(name), module(std::move(module)), nv_flinger(std::move(nv_flinger)) {} | 1210 | return policy == Policy::User; |
| 1211 | } | ||
| 1212 | |||
| 1213 | if (permission == Permission::System || permission == Permission::Manager) { | ||
| 1214 | return policy == Policy::User || policy == Policy::Compositor; | ||
| 1215 | } | ||
| 1209 | 1216 | ||
| 1210 | Module::Interface::~Interface() = default; | 1217 | return false; |
| 1218 | } | ||
| 1211 | 1219 | ||
| 1212 | void Module::Interface::GetDisplayService(Kernel::HLERequestContext& ctx) { | 1220 | void detail::GetDisplayServiceImpl(Kernel::HLERequestContext& ctx, |
| 1213 | LOG_WARNING(Service_VI, "(STUBBED) called"); | 1221 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, |
| 1222 | Permission permission) { | ||
| 1223 | IPC::RequestParser rp{ctx}; | ||
| 1224 | const auto policy = rp.PopEnum<Policy>(); | ||
| 1225 | |||
| 1226 | if (!IsValidServiceAccess(permission, policy)) { | ||
| 1227 | IPC::ResponseBuilder rb{ctx, 2}; | ||
| 1228 | rb.Push(ERR_PERMISSION_DENIED); | ||
| 1229 | return; | ||
| 1230 | } | ||
| 1214 | 1231 | ||
| 1215 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; | 1232 | IPC::ResponseBuilder rb{ctx, 2, 0, 1}; |
| 1216 | rb.Push(RESULT_SUCCESS); | 1233 | rb.Push(RESULT_SUCCESS); |
| 1217 | rb.PushIpcInterface<IApplicationDisplayService>(nv_flinger); | 1234 | rb.PushIpcInterface<IApplicationDisplayService>(std::move(nv_flinger)); |
| 1218 | } | 1235 | } |
| 1219 | 1236 | ||
| 1220 | void InstallInterfaces(SM::ServiceManager& service_manager, | 1237 | void InstallInterfaces(SM::ServiceManager& service_manager, |
| 1221 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) { | 1238 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) { |
| 1222 | auto module = std::make_shared<Module>(); | 1239 | std::make_shared<VI_M>(nv_flinger)->InstallAsService(service_manager); |
| 1223 | std::make_shared<VI_M>(module, nv_flinger)->InstallAsService(service_manager); | 1240 | std::make_shared<VI_S>(nv_flinger)->InstallAsService(service_manager); |
| 1224 | std::make_shared<VI_S>(module, nv_flinger)->InstallAsService(service_manager); | 1241 | std::make_shared<VI_U>(nv_flinger)->InstallAsService(service_manager); |
| 1225 | std::make_shared<VI_U>(module, nv_flinger)->InstallAsService(service_manager); | ||
| 1226 | } | 1242 | } |
| 1227 | 1243 | ||
| 1228 | } // namespace Service::VI | 1244 | } // namespace Service::VI |
diff --git a/src/core/hle/service/vi/vi.h b/src/core/hle/service/vi/vi.h index e3963502a..6b66f8b81 100644 --- a/src/core/hle/service/vi/vi.h +++ b/src/core/hle/service/vi/vi.h | |||
| @@ -4,12 +4,21 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "core/hle/service/service.h" | 7 | #include <memory> |
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace Kernel { | ||
| 11 | class HLERequestContext; | ||
| 12 | } | ||
| 8 | 13 | ||
| 9 | namespace Service::NVFlinger { | 14 | namespace Service::NVFlinger { |
| 10 | class NVFlinger; | 15 | class NVFlinger; |
| 11 | } | 16 | } |
| 12 | 17 | ||
| 18 | namespace Service::SM { | ||
| 19 | class ServiceManager; | ||
| 20 | } | ||
| 21 | |||
| 13 | namespace Service::VI { | 22 | namespace Service::VI { |
| 14 | 23 | ||
| 15 | enum class DisplayResolution : u32 { | 24 | enum class DisplayResolution : u32 { |
| @@ -19,22 +28,25 @@ enum class DisplayResolution : u32 { | |||
| 19 | UndockedHeight = 720, | 28 | UndockedHeight = 720, |
| 20 | }; | 29 | }; |
| 21 | 30 | ||
| 22 | class Module final { | 31 | /// Permission level for a particular VI service instance |
| 23 | public: | 32 | enum class Permission { |
| 24 | class Interface : public ServiceFramework<Interface> { | 33 | User, |
| 25 | public: | 34 | System, |
| 26 | explicit Interface(std::shared_ptr<Module> module, const char* name, | 35 | Manager, |
| 27 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); | 36 | }; |
| 28 | ~Interface() override; | ||
| 29 | |||
| 30 | void GetDisplayService(Kernel::HLERequestContext& ctx); | ||
| 31 | 37 | ||
| 32 | protected: | 38 | /// A policy type that may be requested via GetDisplayService and |
| 33 | std::shared_ptr<Module> module; | 39 | /// GetDisplayServiceWithProxyNameExchange |
| 34 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; | 40 | enum class Policy { |
| 35 | }; | 41 | User, |
| 42 | Compositor, | ||
| 36 | }; | 43 | }; |
| 37 | 44 | ||
| 45 | namespace detail { | ||
| 46 | void GetDisplayServiceImpl(Kernel::HLERequestContext& ctx, | ||
| 47 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger, Permission permission); | ||
| 48 | } // namespace detail | ||
| 49 | |||
| 38 | /// Registers all VI services with the specified service manager. | 50 | /// Registers all VI services with the specified service manager. |
| 39 | void InstallInterfaces(SM::ServiceManager& service_manager, | 51 | void InstallInterfaces(SM::ServiceManager& service_manager, |
| 40 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); | 52 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); |
diff --git a/src/core/hle/service/vi/vi_m.cpp b/src/core/hle/service/vi/vi_m.cpp index 207c06b16..06070087f 100644 --- a/src/core/hle/service/vi/vi_m.cpp +++ b/src/core/hle/service/vi/vi_m.cpp | |||
| @@ -2,12 +2,14 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/hle/service/vi/vi.h" | ||
| 5 | #include "core/hle/service/vi/vi_m.h" | 7 | #include "core/hle/service/vi/vi_m.h" |
| 6 | 8 | ||
| 7 | namespace Service::VI { | 9 | namespace Service::VI { |
| 8 | 10 | ||
| 9 | VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) | 11 | VI_M::VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) |
| 10 | : Module::Interface(std::move(module), "vi:m", std::move(nv_flinger)) { | 12 | : ServiceFramework{"vi:m"}, nv_flinger{std::move(nv_flinger)} { |
| 11 | static const FunctionInfo functions[] = { | 13 | static const FunctionInfo functions[] = { |
| 12 | {2, &VI_M::GetDisplayService, "GetDisplayService"}, | 14 | {2, &VI_M::GetDisplayService, "GetDisplayService"}, |
| 13 | {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, | 15 | {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, |
| @@ -17,4 +19,10 @@ VI_M::VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> | |||
| 17 | 19 | ||
| 18 | VI_M::~VI_M() = default; | 20 | VI_M::~VI_M() = default; |
| 19 | 21 | ||
| 22 | void VI_M::GetDisplayService(Kernel::HLERequestContext& ctx) { | ||
| 23 | LOG_DEBUG(Service_VI, "called"); | ||
| 24 | |||
| 25 | detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::Manager); | ||
| 26 | } | ||
| 27 | |||
| 20 | } // namespace Service::VI | 28 | } // namespace Service::VI |
diff --git a/src/core/hle/service/vi/vi_m.h b/src/core/hle/service/vi/vi_m.h index 487d58d50..290e06689 100644 --- a/src/core/hle/service/vi/vi_m.h +++ b/src/core/hle/service/vi/vi_m.h | |||
| @@ -4,14 +4,27 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "core/hle/service/vi/vi.h" | 7 | #include "core/hle/service/service.h" |
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | class HLERequestContext; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Service::NVFlinger { | ||
| 14 | class NVFlinger; | ||
| 15 | } | ||
| 8 | 16 | ||
| 9 | namespace Service::VI { | 17 | namespace Service::VI { |
| 10 | 18 | ||
| 11 | class VI_M final : public Module::Interface { | 19 | class VI_M final : public ServiceFramework<VI_M> { |
| 12 | public: | 20 | public: |
| 13 | explicit VI_M(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); | 21 | explicit VI_M(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); |
| 14 | ~VI_M() override; | 22 | ~VI_M() override; |
| 23 | |||
| 24 | private: | ||
| 25 | void GetDisplayService(Kernel::HLERequestContext& ctx); | ||
| 26 | |||
| 27 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; | ||
| 15 | }; | 28 | }; |
| 16 | 29 | ||
| 17 | } // namespace Service::VI | 30 | } // namespace Service::VI |
diff --git a/src/core/hle/service/vi/vi_s.cpp b/src/core/hle/service/vi/vi_s.cpp index 920e6a1f6..57c596cc4 100644 --- a/src/core/hle/service/vi/vi_s.cpp +++ b/src/core/hle/service/vi/vi_s.cpp | |||
| @@ -2,12 +2,14 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/hle/service/vi/vi.h" | ||
| 5 | #include "core/hle/service/vi/vi_s.h" | 7 | #include "core/hle/service/vi/vi_s.h" |
| 6 | 8 | ||
| 7 | namespace Service::VI { | 9 | namespace Service::VI { |
| 8 | 10 | ||
| 9 | VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) | 11 | VI_S::VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) |
| 10 | : Module::Interface(std::move(module), "vi:s", std::move(nv_flinger)) { | 12 | : ServiceFramework{"vi:s"}, nv_flinger{std::move(nv_flinger)} { |
| 11 | static const FunctionInfo functions[] = { | 13 | static const FunctionInfo functions[] = { |
| 12 | {1, &VI_S::GetDisplayService, "GetDisplayService"}, | 14 | {1, &VI_S::GetDisplayService, "GetDisplayService"}, |
| 13 | {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, | 15 | {3, nullptr, "GetDisplayServiceWithProxyNameExchange"}, |
| @@ -17,4 +19,10 @@ VI_S::VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> | |||
| 17 | 19 | ||
| 18 | VI_S::~VI_S() = default; | 20 | VI_S::~VI_S() = default; |
| 19 | 21 | ||
| 22 | void VI_S::GetDisplayService(Kernel::HLERequestContext& ctx) { | ||
| 23 | LOG_DEBUG(Service_VI, "called"); | ||
| 24 | |||
| 25 | detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::System); | ||
| 26 | } | ||
| 27 | |||
| 20 | } // namespace Service::VI | 28 | } // namespace Service::VI |
diff --git a/src/core/hle/service/vi/vi_s.h b/src/core/hle/service/vi/vi_s.h index bbc31148f..47804dc0b 100644 --- a/src/core/hle/service/vi/vi_s.h +++ b/src/core/hle/service/vi/vi_s.h | |||
| @@ -4,14 +4,27 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "core/hle/service/vi/vi.h" | 7 | #include "core/hle/service/service.h" |
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | class HLERequestContext; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Service::NVFlinger { | ||
| 14 | class NVFlinger; | ||
| 15 | } | ||
| 8 | 16 | ||
| 9 | namespace Service::VI { | 17 | namespace Service::VI { |
| 10 | 18 | ||
| 11 | class VI_S final : public Module::Interface { | 19 | class VI_S final : public ServiceFramework<VI_S> { |
| 12 | public: | 20 | public: |
| 13 | explicit VI_S(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); | 21 | explicit VI_S(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); |
| 14 | ~VI_S() override; | 22 | ~VI_S() override; |
| 23 | |||
| 24 | private: | ||
| 25 | void GetDisplayService(Kernel::HLERequestContext& ctx); | ||
| 26 | |||
| 27 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; | ||
| 15 | }; | 28 | }; |
| 16 | 29 | ||
| 17 | } // namespace Service::VI | 30 | } // namespace Service::VI |
diff --git a/src/core/hle/service/vi/vi_u.cpp b/src/core/hle/service/vi/vi_u.cpp index d81e410d6..9d5ceb608 100644 --- a/src/core/hle/service/vi/vi_u.cpp +++ b/src/core/hle/service/vi/vi_u.cpp | |||
| @@ -2,12 +2,14 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | ||
| 6 | #include "core/hle/service/vi/vi.h" | ||
| 5 | #include "core/hle/service/vi/vi_u.h" | 7 | #include "core/hle/service/vi/vi_u.h" |
| 6 | 8 | ||
| 7 | namespace Service::VI { | 9 | namespace Service::VI { |
| 8 | 10 | ||
| 9 | VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) | 11 | VI_U::VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger) |
| 10 | : Module::Interface(std::move(module), "vi:u", std::move(nv_flinger)) { | 12 | : ServiceFramework{"vi:u"}, nv_flinger{std::move(nv_flinger)} { |
| 11 | static const FunctionInfo functions[] = { | 13 | static const FunctionInfo functions[] = { |
| 12 | {0, &VI_U::GetDisplayService, "GetDisplayService"}, | 14 | {0, &VI_U::GetDisplayService, "GetDisplayService"}, |
| 13 | }; | 15 | }; |
| @@ -16,4 +18,10 @@ VI_U::VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> | |||
| 16 | 18 | ||
| 17 | VI_U::~VI_U() = default; | 19 | VI_U::~VI_U() = default; |
| 18 | 20 | ||
| 21 | void VI_U::GetDisplayService(Kernel::HLERequestContext& ctx) { | ||
| 22 | LOG_DEBUG(Service_VI, "called"); | ||
| 23 | |||
| 24 | detail::GetDisplayServiceImpl(ctx, nv_flinger, Permission::User); | ||
| 25 | } | ||
| 26 | |||
| 19 | } // namespace Service::VI | 27 | } // namespace Service::VI |
diff --git a/src/core/hle/service/vi/vi_u.h b/src/core/hle/service/vi/vi_u.h index b92f28c92..19bdb73b0 100644 --- a/src/core/hle/service/vi/vi_u.h +++ b/src/core/hle/service/vi/vi_u.h | |||
| @@ -4,14 +4,27 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "core/hle/service/vi/vi.h" | 7 | #include "core/hle/service/service.h" |
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | class HLERequestContext; | ||
| 11 | } | ||
| 12 | |||
| 13 | namespace Service::NVFlinger { | ||
| 14 | class NVFlinger; | ||
| 15 | } | ||
| 8 | 16 | ||
| 9 | namespace Service::VI { | 17 | namespace Service::VI { |
| 10 | 18 | ||
| 11 | class VI_U final : public Module::Interface { | 19 | class VI_U final : public ServiceFramework<VI_U> { |
| 12 | public: | 20 | public: |
| 13 | explicit VI_U(std::shared_ptr<Module> module, std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); | 21 | explicit VI_U(std::shared_ptr<NVFlinger::NVFlinger> nv_flinger); |
| 14 | ~VI_U() override; | 22 | ~VI_U() override; |
| 23 | |||
| 24 | private: | ||
| 25 | void GetDisplayService(Kernel::HLERequestContext& ctx); | ||
| 26 | |||
| 27 | std::shared_ptr<NVFlinger::NVFlinger> nv_flinger; | ||
| 15 | }; | 28 | }; |
| 16 | 29 | ||
| 17 | } // namespace Service::VI | 30 | } // namespace Service::VI |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f809567b6..6591c45d2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -171,9 +171,6 @@ T Read(const VAddr vaddr) { | |||
| 171 | return value; | 171 | return value; |
| 172 | } | 172 | } |
| 173 | 173 | ||
| 174 | // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state | ||
| 175 | std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); | ||
| 176 | |||
| 177 | PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; | 174 | PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; |
| 178 | switch (type) { | 175 | switch (type) { |
| 179 | case PageType::Unmapped: | 176 | case PageType::Unmapped: |
| @@ -204,9 +201,6 @@ void Write(const VAddr vaddr, const T data) { | |||
| 204 | return; | 201 | return; |
| 205 | } | 202 | } |
| 206 | 203 | ||
| 207 | // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state | ||
| 208 | std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); | ||
| 209 | |||
| 210 | PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; | 204 | PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; |
| 211 | switch (type) { | 205 | switch (type) { |
| 212 | case PageType::Unmapped: | 206 | case PageType::Unmapped: |
| @@ -362,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | |||
| 362 | const VAddr overlap_end = std::min(end, region_end); | 356 | const VAddr overlap_end = std::min(end, region_end); |
| 363 | const VAddr overlap_size = overlap_end - overlap_start; | 357 | const VAddr overlap_size = overlap_end - overlap_start; |
| 364 | 358 | ||
| 365 | auto& rasterizer = system_instance.Renderer().Rasterizer(); | 359 | auto& gpu = system_instance.GPU(); |
| 366 | switch (mode) { | 360 | switch (mode) { |
| 367 | case FlushMode::Flush: | 361 | case FlushMode::Flush: |
| 368 | rasterizer.FlushRegion(overlap_start, overlap_size); | 362 | gpu.FlushRegion(overlap_start, overlap_size); |
| 369 | break; | 363 | break; |
| 370 | case FlushMode::Invalidate: | 364 | case FlushMode::Invalidate: |
| 371 | rasterizer.InvalidateRegion(overlap_start, overlap_size); | 365 | gpu.InvalidateRegion(overlap_start, overlap_size); |
| 372 | break; | 366 | break; |
| 373 | case FlushMode::FlushAndInvalidate: | 367 | case FlushMode::FlushAndInvalidate: |
| 374 | rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); | 368 | gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); |
| 375 | break; | 369 | break; |
| 376 | } | 370 | } |
| 377 | }; | 371 | }; |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 2e232e1e7..6dd3139cc 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -91,7 +91,10 @@ void LogSettings() { | |||
| 91 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); | 91 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); |
| 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
| 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); | 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); |
| 94 | LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | ||
| 94 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); | 95 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); |
| 96 | LogSetting("Renderer_UseAsynchronousGpuEmulation", | ||
| 97 | Settings::values.use_asynchronous_gpu_emulation); | ||
| 95 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); | 98 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); |
| 96 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); | 99 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); |
| 97 | LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); | 100 | LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); |
diff --git a/src/core/settings.h b/src/core/settings.h index 7e76e0466..cdfb2f742 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -393,6 +393,7 @@ struct Values { | |||
| 393 | u16 frame_limit; | 393 | u16 frame_limit; |
| 394 | bool use_disk_shader_cache; | 394 | bool use_disk_shader_cache; |
| 395 | bool use_accurate_gpu_emulation; | 395 | bool use_accurate_gpu_emulation; |
| 396 | bool use_asynchronous_gpu_emulation; | ||
| 396 | 397 | ||
| 397 | float bg_red; | 398 | float bg_red; |
| 398 | float bg_green; | 399 | float bg_green; |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 58dfcc4df..e1db06811 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() { | |||
| 162 | Settings::values.use_disk_shader_cache); | 162 | Settings::values.use_disk_shader_cache); |
| 163 | AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", | 163 | AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", |
| 164 | Settings::values.use_accurate_gpu_emulation); | 164 | Settings::values.use_accurate_gpu_emulation); |
| 165 | AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation", | ||
| 166 | Settings::values.use_asynchronous_gpu_emulation); | ||
| 165 | AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", | 167 | AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", |
| 166 | Settings::values.use_docked_mode); | 168 | Settings::values.use_docked_mode); |
| 167 | } | 169 | } |
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index 9b8a44fa1..6fe56833d 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp | |||
| @@ -13,11 +13,11 @@ | |||
| 13 | namespace ArmTests { | 13 | namespace ArmTests { |
| 14 | 14 | ||
| 15 | TestEnvironment::TestEnvironment(bool mutable_memory_) | 15 | TestEnvironment::TestEnvironment(bool mutable_memory_) |
| 16 | : mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) { | 16 | : mutable_memory(mutable_memory_), |
| 17 | 17 | test_memory(std::make_shared<TestMemory>(this)), kernel{Core::System::GetInstance()} { | |
| 18 | auto process = Kernel::Process::Create(kernel, ""); | 18 | auto process = Kernel::Process::Create(Core::System::GetInstance(), ""); |
| 19 | kernel.MakeCurrentProcess(process.get()); | 19 | kernel.MakeCurrentProcess(process.get()); |
| 20 | page_table = &Core::CurrentProcess()->VMManager().page_table; | 20 | page_table = &process->VMManager().page_table; |
| 21 | 21 | ||
| 22 | std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); | 22 | std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); |
| 23 | page_table->special_regions.clear(); | 23 | page_table->special_regions.clear(); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e9d2b3be..0c3038c52 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -17,6 +17,12 @@ add_library(video_core STATIC | |||
| 17 | engines/shader_header.h | 17 | engines/shader_header.h |
| 18 | gpu.cpp | 18 | gpu.cpp |
| 19 | gpu.h | 19 | gpu.h |
| 20 | gpu_asynch.cpp | ||
| 21 | gpu_asynch.h | ||
| 22 | gpu_synch.cpp | ||
| 23 | gpu_synch.h | ||
| 24 | gpu_thread.cpp | ||
| 25 | gpu_thread.h | ||
| 20 | macro_interpreter.cpp | 26 | macro_interpreter.cpp |
| 21 | macro_interpreter.h | 27 | macro_interpreter.h |
| 22 | memory_manager.cpp | 28 | memory_manager.cpp |
| @@ -74,6 +80,7 @@ add_library(video_core STATIC | |||
| 74 | shader/decode/hfma2.cpp | 80 | shader/decode/hfma2.cpp |
| 75 | shader/decode/conversion.cpp | 81 | shader/decode/conversion.cpp |
| 76 | shader/decode/memory.cpp | 82 | shader/decode/memory.cpp |
| 83 | shader/decode/texture.cpp | ||
| 77 | shader/decode/float_set_predicate.cpp | 84 | shader/decode/float_set_predicate.cpp |
| 78 | shader/decode/integer_set_predicate.cpp | 85 | shader/decode/integer_set_predicate.cpp |
| 79 | shader/decode/half_set_predicate.cpp | 86 | shader/decode/half_set_predicate.cpp |
| @@ -94,6 +101,8 @@ add_library(video_core STATIC | |||
| 94 | surface.h | 101 | surface.h |
| 95 | textures/astc.cpp | 102 | textures/astc.cpp |
| 96 | textures/astc.h | 103 | textures/astc.h |
| 104 | textures/convert.cpp | ||
| 105 | textures/convert.h | ||
| 97 | textures/decoders.cpp | 106 | textures/decoders.cpp |
| 98 | textures/decoders.h | 107 | textures/decoders.h |
| 99 | textures/texture.h | 108 | textures/texture.h |
| @@ -104,6 +113,8 @@ add_library(video_core STATIC | |||
| 104 | if (ENABLE_VULKAN) | 113 | if (ENABLE_VULKAN) |
| 105 | target_sources(video_core PRIVATE | 114 | target_sources(video_core PRIVATE |
| 106 | renderer_vulkan/declarations.h | 115 | renderer_vulkan/declarations.h |
| 116 | renderer_vulkan/maxwell_to_vk.cpp | ||
| 117 | renderer_vulkan/maxwell_to_vk.h | ||
| 107 | renderer_vulkan/vk_buffer_cache.cpp | 118 | renderer_vulkan/vk_buffer_cache.cpp |
| 108 | renderer_vulkan/vk_buffer_cache.h | 119 | renderer_vulkan/vk_buffer_cache.h |
| 109 | renderer_vulkan/vk_device.cpp | 120 | renderer_vulkan/vk_device.cpp |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 669541b4b..bff1a37ff 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -39,7 +39,7 @@ bool DmaPusher::Step() { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | const CommandList& command_list{dma_pushbuffer.front()}; | 41 | const CommandList& command_list{dma_pushbuffer.front()}; |
| 42 | const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; | 42 | const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; |
| 43 | GPUVAddr dma_get = command_list_header.addr; | 43 | GPUVAddr dma_get = command_list_header.addr; |
| 44 | GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); | 44 | GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); |
| 45 | bool non_main = command_list_header.is_non_main; | 45 | bool non_main = command_list_header.is_non_main; |
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 540dcc52c..03b7ee5d8 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -2,12 +2,11 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/core.h" | 5 | #include "common/assert.h" |
| 6 | #include "core/memory.h" | 6 | #include "common/logging/log.h" |
| 7 | #include "common/math_util.h" | ||
| 7 | #include "video_core/engines/fermi_2d.h" | 8 | #include "video_core/engines/fermi_2d.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/rasterizer_interface.h" | 9 | #include "video_core/rasterizer_interface.h" |
| 10 | #include "video_core/textures/decoders.h" | ||
| 11 | 10 | ||
| 12 | namespace Tegra::Engines { | 11 | namespace Tegra::Engines { |
| 13 | 12 | ||
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index c69f74cc5..80523e320 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include "common/assert.h" | 8 | #include <cstddef> |
| 9 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 10 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 4ca856b6b..b1d950460 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -2,9 +2,8 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | ||
| 5 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 6 | #include "core/core.h" | ||
| 7 | #include "core/memory.h" | ||
| 8 | #include "video_core/engines/kepler_compute.h" | 7 | #include "video_core/engines/kepler_compute.h" |
| 9 | #include "video_core/memory_manager.h" | 8 | #include "video_core/memory_manager.h" |
| 10 | 9 | ||
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index df0a32e0f..6575afd0f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -5,8 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include "common/assert.h" | 8 | #include <cstddef> |
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_funcs.h" | 9 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 12 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. |
| 49 | // We do this before actually writing the new data because the destination address might contain | 49 | // We do this before actually writing the new data because the destination address might contain |
| 50 | // a dirty surface that will have to be written back to memory. | 50 | // a dirty surface that will have to be written back to memory. |
| 51 | rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); | 51 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); |
| 52 | 52 | ||
| 53 | Memory::Write32(*dest_address, data); | 53 | Memory::Write32(*dest_address, data); |
| 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index f680c2ad9..9181e9d80 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | ||
| 8 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 9 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 529a14ec7..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/logging/log.h" | ||
| 6 | #include "core/core.h" | 7 | #include "core/core.h" |
| 7 | #include "core/memory.h" | 8 | #include "core/memory.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| @@ -91,12 +92,12 @@ void MaxwellDMA::HandleCopy() { | |||
| 91 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |
| 92 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated |
| 93 | // copying. | 94 | // copying. |
| 94 | rasterizer.FlushRegion(*source_cpu, src_size); | 95 | Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); |
| 95 | 96 | ||
| 96 | // We have to invalidate the destination region to evict any outdated surfaces from the | 97 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 97 | // cache. We do this before actually writing the new data because the destination address | 98 | // cache. We do this before actually writing the new data because the destination address |
| 98 | // might contain a dirty surface that will have to be written back to memory. | 99 | // might contain a dirty surface that will have to be written back to memory. |
| 99 | rasterizer.InvalidateRegion(*dest_cpu, dst_size); | 100 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); |
| 100 | }; | 101 | }; |
| 101 | 102 | ||
| 102 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index cf75aeb12..34c369320 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstddef> | ||
| 8 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 9 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 252592edd..7f613370b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include <bitset> | 7 | #include <bitset> |
| 8 | #include <optional> | 8 | #include <optional> |
| 9 | #include <string> | ||
| 10 | #include <tuple> | 9 | #include <tuple> |
| 11 | #include <vector> | 10 | #include <vector> |
| 12 | 11 | ||
| @@ -325,11 +324,11 @@ enum class TextureQueryType : u64 { | |||
| 325 | 324 | ||
| 326 | enum class TextureProcessMode : u64 { | 325 | enum class TextureProcessMode : u64 { |
| 327 | None = 0, | 326 | None = 0, |
| 328 | LZ = 1, // Unknown, appears to be the same as none. | 327 | LZ = 1, // Load LOD of zero. |
| 329 | LB = 2, // Load Bias. | 328 | LB = 2, // Load Bias. |
| 330 | LL = 3, // Load LOD (LevelOfDetail) | 329 | LL = 3, // Load LOD. |
| 331 | LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB | 330 | LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. |
| 332 | LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL | 331 | LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. |
| 333 | }; | 332 | }; |
| 334 | 333 | ||
| 335 | enum class TextureMiscMode : u64 { | 334 | enum class TextureMiscMode : u64 { |
| @@ -1446,6 +1445,7 @@ public: | |||
| 1446 | Flow, | 1445 | Flow, |
| 1447 | Synch, | 1446 | Synch, |
| 1448 | Memory, | 1447 | Memory, |
| 1448 | Texture, | ||
| 1449 | FloatSet, | 1449 | FloatSet, |
| 1450 | FloatSetPredicate, | 1450 | FloatSetPredicate, |
| 1451 | IntegerSet, | 1451 | IntegerSet, |
| @@ -1576,14 +1576,14 @@ private: | |||
| 1576 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | 1576 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |
| 1577 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), | 1577 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), |
| 1578 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 1578 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 1579 | INST("110000----111---", Id::TEX, Type::Memory, "TEX"), | 1579 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
| 1580 | INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), | 1580 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), |
| 1581 | INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), | 1581 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1582 | INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), | 1582 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1583 | INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), | 1583 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1584 | INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), | 1584 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1585 | INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), | 1585 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1586 | INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), | 1586 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1587 | INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), | 1587 | INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), |
| 1588 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1588 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1589 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1589 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ac30d1a89..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | 13 | #include "video_core/engines/maxwell_dma.h" |
| 14 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 15 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/renderer_base.h" |
| 16 | 16 | ||
| 17 | namespace Tegra { | 17 | namespace Tegra { |
| 18 | 18 | ||
| @@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | |||
| 28 | UNREACHABLE(); | 28 | UNREACHABLE(); |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { | 31 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { |
| 32 | auto& rasterizer{renderer.Rasterizer()}; | ||
| 32 | memory_manager = std::make_unique<Tegra::MemoryManager>(); | 33 | memory_manager = std::make_unique<Tegra::MemoryManager>(); |
| 33 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 34 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 34 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 35 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 6313702f2..56a203275 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -16,8 +16,8 @@ class System; | |||
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | namespace VideoCore { | 18 | namespace VideoCore { |
| 19 | class RasterizerInterface; | 19 | class RendererBase; |
| 20 | } | 20 | } // namespace VideoCore |
| 21 | 21 | ||
| 22 | namespace Tegra { | 22 | namespace Tegra { |
| 23 | 23 | ||
| @@ -119,10 +119,11 @@ enum class EngineID { | |||
| 119 | MAXWELL_DMA_COPY_A = 0xB0B5, | 119 | MAXWELL_DMA_COPY_A = 0xB0B5, |
| 120 | }; | 120 | }; |
| 121 | 121 | ||
| 122 | class GPU final { | 122 | class GPU { |
| 123 | public: | 123 | public: |
| 124 | explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); | 124 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); |
| 125 | ~GPU(); | 125 | |
| 126 | virtual ~GPU(); | ||
| 126 | 127 | ||
| 127 | struct MethodCall { | 128 | struct MethodCall { |
| 128 | u32 method{}; | 129 | u32 method{}; |
| @@ -200,8 +201,42 @@ public: | |||
| 200 | }; | 201 | }; |
| 201 | } regs{}; | 202 | } regs{}; |
| 202 | 203 | ||
| 204 | /// Push GPU command entries to be processed | ||
| 205 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | ||
| 206 | |||
| 207 | /// Swap buffers (render frame) | ||
| 208 | virtual void SwapBuffers( | ||
| 209 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||
| 210 | |||
| 211 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 212 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | ||
| 213 | |||
| 214 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 215 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | ||
| 216 | |||
| 217 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 218 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | ||
| 219 | |||
| 203 | private: | 220 | private: |
| 221 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 222 | void ProcessSemaphoreTriggerMethod(); | ||
| 223 | void ProcessSemaphoreRelease(); | ||
| 224 | void ProcessSemaphoreAcquire(); | ||
| 225 | |||
| 226 | /// Calls a GPU puller method. | ||
| 227 | void CallPullerMethod(const MethodCall& method_call); | ||
| 228 | |||
| 229 | /// Calls a GPU engine method. | ||
| 230 | void CallEngineMethod(const MethodCall& method_call); | ||
| 231 | |||
| 232 | /// Determines where the method should be executed. | ||
| 233 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 234 | |||
| 235 | protected: | ||
| 204 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 236 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 237 | VideoCore::RendererBase& renderer; | ||
| 238 | |||
| 239 | private: | ||
| 205 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 240 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| 206 | 241 | ||
| 207 | /// Mapping of command subchannels to their bound engine ids. | 242 | /// Mapping of command subchannels to their bound engine ids. |
| @@ -217,18 +252,6 @@ private: | |||
| 217 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 252 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 218 | /// Inline memory engine | 253 | /// Inline memory engine |
| 219 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 254 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 220 | |||
| 221 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 222 | void ProcessSemaphoreTriggerMethod(); | ||
| 223 | void ProcessSemaphoreRelease(); | ||
| 224 | void ProcessSemaphoreAcquire(); | ||
| 225 | |||
| 226 | // Calls a GPU puller method. | ||
| 227 | void CallPullerMethod(const MethodCall& method_call); | ||
| 228 | // Calls a GPU engine method. | ||
| 229 | void CallEngineMethod(const MethodCall& method_call); | ||
| 230 | // Determines where the method should be executed. | ||
| 231 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 232 | }; | 255 | }; |
| 233 | 256 | ||
| 234 | #define ASSERT_REG_POSITION(field_name, position) \ | 257 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp new file mode 100644 index 000000000..ad0a747e3 --- /dev/null +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/gpu_asynch.h" | ||
| 6 | #include "video_core/gpu_thread.h" | ||
| 7 | #include "video_core/renderer_base.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) | ||
| 12 | : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} | ||
| 13 | |||
| 14 | GPUAsynch::~GPUAsynch() = default; | ||
| 15 | |||
| 16 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 17 | gpu_thread.SubmitList(std::move(entries)); | ||
| 18 | } | ||
| 19 | |||
| 20 | void GPUAsynch::SwapBuffers( | ||
| 21 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 22 | gpu_thread.SwapBuffers(std::move(framebuffer)); | ||
| 23 | } | ||
| 24 | |||
| 25 | void GPUAsynch::FlushRegion(VAddr addr, u64 size) { | ||
| 26 | gpu_thread.FlushRegion(addr, size); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 30 | gpu_thread.InvalidateRegion(addr, size); | ||
| 31 | } | ||
| 32 | |||
| 33 | void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 34 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h new file mode 100644 index 000000000..e6a807aba --- /dev/null +++ b/src/video_core/gpu_asynch.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | #include "video_core/gpu_thread.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | class RendererBase; | ||
| 12 | } // namespace VideoCore | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | namespace GPUThread { | ||
| 17 | class ThreadManager; | ||
| 18 | } // namespace GPUThread | ||
| 19 | |||
| 20 | /// Implementation of GPU interface that runs the GPU asynchronously | ||
| 21 | class GPUAsynch : public Tegra::GPU { | ||
| 22 | public: | ||
| 23 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); | ||
| 24 | ~GPUAsynch() override; | ||
| 25 | |||
| 26 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 27 | void SwapBuffers( | ||
| 28 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 29 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 30 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 31 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 32 | |||
| 33 | private: | ||
| 34 | GPUThread::ThreadManager gpu_thread; | ||
| 35 | }; | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp new file mode 100644 index 000000000..4c00b96c7 --- /dev/null +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/gpu_synch.h" | ||
| 6 | #include "video_core/renderer_base.h" | ||
| 7 | |||
| 8 | namespace VideoCommon { | ||
| 9 | |||
| 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) | ||
| 11 | : Tegra::GPU(system, renderer) {} | ||
| 12 | |||
| 13 | GPUSynch::~GPUSynch() = default; | ||
| 14 | |||
| 15 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 16 | dma_pusher->Push(std::move(entries)); | ||
| 17 | dma_pusher->DispatchCalls(); | ||
| 18 | } | ||
| 19 | |||
| 20 | void GPUSynch::SwapBuffers( | ||
| 21 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 22 | renderer.SwapBuffers(std::move(framebuffer)); | ||
| 23 | } | ||
| 24 | |||
| 25 | void GPUSynch::FlushRegion(VAddr addr, u64 size) { | ||
| 26 | renderer.Rasterizer().FlushRegion(addr, size); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 30 | renderer.Rasterizer().InvalidateRegion(addr, size); | ||
| 31 | } | ||
| 32 | |||
| 33 | void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 34 | renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h new file mode 100644 index 000000000..7d5a241ff --- /dev/null +++ b/src/video_core/gpu_synch.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | |||
| 9 | namespace VideoCore { | ||
| 10 | class RendererBase; | ||
| 11 | } // namespace VideoCore | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | /// Implementation of GPU interface that runs the GPU synchronously | ||
| 16 | class GPUSynch : public Tegra::GPU { | ||
| 17 | public: | ||
| 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); | ||
| 19 | ~GPUSynch() override; | ||
| 20 | |||
| 21 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 22 | void SwapBuffers( | ||
| 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 24 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 25 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 26 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 27 | }; | ||
| 28 | |||
| 29 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..c5bdd2a17 --- /dev/null +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -0,0 +1,152 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/microprofile.h" | ||
| 7 | #include "core/frontend/scope_acquire_window_context.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/dma_pusher.h" | ||
| 10 | #include "video_core/gpu.h" | ||
| 11 | #include "video_core/gpu_thread.h" | ||
| 12 | #include "video_core/renderer_base.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::GPUThread { | ||
| 15 | |||
| 16 | /// Executes a single GPU thread command | ||
| 17 | static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, | ||
| 18 | Tegra::DmaPusher& dma_pusher) { | ||
| 19 | if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { | ||
| 20 | dma_pusher.Push(std::move(submit_list->entries)); | ||
| 21 | dma_pusher.DispatchCalls(); | ||
| 22 | } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { | ||
| 23 | renderer.SwapBuffers(data->framebuffer); | ||
| 24 | } else if (const auto data = std::get_if<FlushRegionCommand>(command)) { | ||
| 25 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||
| 26 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { | ||
| 27 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | ||
| 28 | } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { | ||
| 29 | renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); | ||
| 30 | } else { | ||
| 31 | UNREACHABLE(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | /// Runs the GPU thread | ||
| 36 | static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, | ||
| 37 | SynchState& state) { | ||
| 38 | |||
| 39 | MicroProfileOnThreadCreate("GpuThread"); | ||
| 40 | |||
| 41 | auto WaitForWakeup = [&]() { | ||
| 42 | std::unique_lock<std::mutex> lock{state.signal_mutex}; | ||
| 43 | state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); | ||
| 44 | }; | ||
| 45 | |||
| 46 | // Wait for first GPU command before acquiring the window context | ||
| 47 | WaitForWakeup(); | ||
| 48 | |||
| 49 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | ||
| 50 | if (!state.is_running) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | |||
| 54 | Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; | ||
| 55 | |||
| 56 | while (state.is_running) { | ||
| 57 | if (!state.is_running) { | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | |||
| 61 | { | ||
| 62 | // Thread has been woken up, so make the previous write queue the next read queue | ||
| 63 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 64 | std::swap(state.push_queue, state.pop_queue); | ||
| 65 | } | ||
| 66 | |||
| 67 | // Execute all of the GPU commands | ||
| 68 | while (!state.pop_queue->empty()) { | ||
| 69 | ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); | ||
| 70 | state.pop_queue->pop(); | ||
| 71 | } | ||
| 72 | |||
| 73 | state.UpdateIdleState(); | ||
| 74 | |||
| 75 | // Signal that the GPU thread has finished processing commands | ||
| 76 | if (state.is_idle) { | ||
| 77 | state.idle_condition.notify_one(); | ||
| 78 | } | ||
| 79 | |||
| 80 | // Wait for CPU thread to send more GPU commands | ||
| 81 | WaitForWakeup(); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) | ||
| 86 | : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), | ||
| 87 | std::ref(dma_pusher), std::ref(state)}, | ||
| 88 | thread_id{thread.get_id()} {} | ||
| 89 | |||
| 90 | ThreadManager::~ThreadManager() { | ||
| 91 | { | ||
| 92 | // Notify GPU thread that a shutdown is pending | ||
| 93 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 94 | state.is_running = false; | ||
| 95 | } | ||
| 96 | |||
| 97 | state.signal_condition.notify_one(); | ||
| 98 | thread.join(); | ||
| 99 | } | ||
| 100 | |||
| 101 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||
| 102 | if (entries.empty()) { | ||
| 103 | return; | ||
| 104 | } | ||
| 105 | |||
| 106 | PushCommand(SubmitListCommand(std::move(entries)), false, false); | ||
| 107 | } | ||
| 108 | |||
| 109 | void ThreadManager::SwapBuffers( | ||
| 110 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 111 | PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); | ||
| 112 | } | ||
| 113 | |||
| 114 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||
| 115 | // Block the CPU when using accurate emulation | ||
| 116 | PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); | ||
| 117 | } | ||
| 118 | |||
| 119 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||
| 120 | PushCommand(InvalidateRegionCommand(addr, size), true, true); | ||
| 121 | } | ||
| 122 | |||
| 123 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 124 | InvalidateRegion(addr, size); | ||
| 125 | } | ||
| 126 | |||
| 127 | void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { | ||
| 128 | { | ||
| 129 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 130 | |||
| 131 | if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { | ||
| 132 | // Execute the command synchronously on the current thread | ||
| 133 | ExecuteCommand(&command_data, renderer, dma_pusher); | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | |||
| 137 | // Push the command to the GPU thread | ||
| 138 | state.UpdateIdleState(); | ||
| 139 | state.push_queue->emplace(command_data); | ||
| 140 | } | ||
| 141 | |||
| 142 | // Signal the GPU thread that commands are pending | ||
| 143 | state.signal_condition.notify_one(); | ||
| 144 | |||
| 145 | if (wait_for_idle) { | ||
| 146 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 147 | std::unique_lock<std::mutex> lock{state.idle_mutex}; | ||
| 148 | state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); }); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | } // namespace VideoCommon::GPUThread | ||
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..edb148b14 --- /dev/null +++ b/src/video_core/gpu_thread.h | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <memory> | ||
| 11 | #include <mutex> | ||
| 12 | #include <optional> | ||
| 13 | #include <thread> | ||
| 14 | #include <variant> | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | struct FramebufferConfig; | ||
| 18 | class DmaPusher; | ||
| 19 | } // namespace Tegra | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RendererBase; | ||
| 23 | } // namespace VideoCore | ||
| 24 | |||
| 25 | namespace VideoCommon::GPUThread { | ||
| 26 | |||
| 27 | /// Command to signal to the GPU thread that a command list is ready for processing | ||
| 28 | struct SubmitListCommand final { | ||
| 29 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} | ||
| 30 | |||
| 31 | Tegra::CommandList entries; | ||
| 32 | }; | ||
| 33 | |||
| 34 | /// Command to signal to the GPU thread that a swap buffers is pending | ||
| 35 | struct SwapBuffersCommand final { | ||
| 36 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) | ||
| 37 | : framebuffer{std::move(framebuffer)} {} | ||
| 38 | |||
| 39 | std::optional<const Tegra::FramebufferConfig> framebuffer; | ||
| 40 | }; | ||
| 41 | |||
| 42 | /// Command to signal to the GPU thread to flush a region | ||
| 43 | struct FlushRegionCommand final { | ||
| 44 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 45 | |||
| 46 | const VAddr addr; | ||
| 47 | const u64 size; | ||
| 48 | }; | ||
| 49 | |||
| 50 | /// Command to signal to the GPU thread to invalidate a region | ||
| 51 | struct InvalidateRegionCommand final { | ||
| 52 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 53 | |||
| 54 | const VAddr addr; | ||
| 55 | const u64 size; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /// Command to signal to the GPU thread to flush and invalidate a region | ||
| 59 | struct FlushAndInvalidateRegionCommand final { | ||
| 60 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) | ||
| 61 | : addr{addr}, size{size} {} | ||
| 62 | |||
| 63 | const VAddr addr; | ||
| 64 | const u64 size; | ||
| 65 | }; | ||
| 66 | |||
| 67 | using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||
| 68 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||
| 69 | |||
| 70 | /// Struct used to synchronize the GPU thread | ||
| 71 | struct SynchState final { | ||
| 72 | std::atomic<bool> is_running{true}; | ||
| 73 | std::atomic<bool> is_idle{true}; | ||
| 74 | std::condition_variable signal_condition; | ||
| 75 | std::mutex signal_mutex; | ||
| 76 | std::condition_variable idle_condition; | ||
| 77 | std::mutex idle_mutex; | ||
| 78 | |||
| 79 | // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and | ||
| 80 | // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes | ||
| 81 | // empty. This allows for efficient thread-safe access, as it does not require any copies. | ||
| 82 | |||
| 83 | using CommandQueue = std::queue<CommandData>; | ||
| 84 | std::array<CommandQueue, 2> command_queues; | ||
| 85 | CommandQueue* push_queue{&command_queues[0]}; | ||
| 86 | CommandQueue* pop_queue{&command_queues[1]}; | ||
| 87 | |||
| 88 | void UpdateIdleState() { | ||
| 89 | std::lock_guard<std::mutex> lock{idle_mutex}; | ||
| 90 | is_idle = command_queues[0].empty() && command_queues[1].empty(); | ||
| 91 | } | ||
| 92 | }; | ||
| 93 | |||
| 94 | /// Class used to manage the GPU thread | ||
| 95 | class ThreadManager final { | ||
| 96 | public: | ||
| 97 | explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); | ||
| 98 | ~ThreadManager(); | ||
| 99 | |||
| 100 | /// Push GPU command entries to be processed | ||
| 101 | void SubmitList(Tegra::CommandList&& entries); | ||
| 102 | |||
| 103 | /// Swap buffers (render frame) | ||
| 104 | void SwapBuffers( | ||
| 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | ||
| 106 | |||
| 107 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 108 | void FlushRegion(VAddr addr, u64 size); | ||
| 109 | |||
| 110 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 111 | void InvalidateRegion(VAddr addr, u64 size); | ||
| 112 | |||
| 113 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 114 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||
| 115 | |||
| 116 | private: | ||
| 117 | /// Pushes a command to be executed by the GPU thread | ||
| 118 | void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); | ||
| 119 | |||
| 120 | /// Returns true if this is called by the GPU thread | ||
| 121 | bool IsGpuThread() const { | ||
| 122 | return std::this_thread::get_id() == thread_id; | ||
| 123 | } | ||
| 124 | |||
| 125 | private: | ||
| 126 | SynchState state; | ||
| 127 | VideoCore::RendererBase& renderer; | ||
| 128 | Tegra::DmaPusher& dma_pusher; | ||
| 129 | std::thread thread; | ||
| 130 | std::thread::id thread_id; | ||
| 131 | }; | ||
| 132 | |||
| 133 | } // namespace VideoCommon::GPUThread | ||
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 94223f45f..919d1f2d4 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "common/logging/log.h" | ||
| 5 | #include "core/frontend/emu_window.h" | 6 | #include "core/frontend/emu_window.h" |
| 6 | #include "core/settings.h" | 7 | #include "core/settings.h" |
| 7 | #include "video_core/renderer_base.h" | 8 | #include "video_core/renderer_base.h" |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c8c1d6911..824863561 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst | |||
| 118 | 118 | ||
| 119 | glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); | 119 | glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); |
| 120 | 120 | ||
| 121 | LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); | 121 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 122 | CheckExtensions(); | 122 | CheckExtensions(); |
| 123 | } | 123 | } |
| 124 | 124 | ||
| @@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 177 | continue; | 177 | continue; |
| 178 | 178 | ||
| 179 | const auto& buffer = regs.vertex_array[attrib.buffer]; | 179 | const auto& buffer = regs.vertex_array[attrib.buffer]; |
| 180 | LOG_TRACE(HW_GPU, | 180 | LOG_TRACE(Render_OpenGL, |
| 181 | "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | 181 | "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", |
| 182 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | 182 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), |
| 183 | attrib.offset.Value(), attrib.IsNormalized()); | 183 | attrib.offset.Value(), attrib.IsNormalized()); |
| @@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 343 | shader_program_manager->UseProgrammableFragmentShader(program_handle); | 343 | shader_program_manager->UseProgrammableFragmentShader(program_handle); |
| 344 | break; | 344 | break; |
| 345 | default: | 345 | default: |
| 346 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | 346 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |
| 347 | shader_config.enable.Value(), shader_config.offset); | 347 | shader_config.enable.Value(), shader_config.offset); |
| 348 | UNREACHABLE(); | ||
| 349 | } | 348 | } |
| 350 | 349 | ||
| 351 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); | 350 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| @@ -739,33 +738,17 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 739 | state.Apply(); | 738 | state.Apply(); |
| 740 | 739 | ||
| 741 | res_cache.SignalPreDrawCall(); | 740 | res_cache.SignalPreDrawCall(); |
| 742 | |||
| 743 | // Execute draw call | ||
| 744 | params.DispatchDraw(); | 741 | params.DispatchDraw(); |
| 745 | |||
| 746 | res_cache.SignalPostDrawCall(); | 742 | res_cache.SignalPostDrawCall(); |
| 747 | 743 | ||
| 748 | // Disable scissor test | ||
| 749 | state.viewports[0].scissor.enabled = false; | ||
| 750 | |||
| 751 | accelerate_draw = AccelDraw::Disabled; | 744 | accelerate_draw = AccelDraw::Disabled; |
| 752 | |||
| 753 | // Unbind textures for potential future use as framebuffer attachments | ||
| 754 | for (auto& texture_unit : state.texture_units) { | ||
| 755 | texture_unit.Unbind(); | ||
| 756 | } | ||
| 757 | state.Apply(); | ||
| 758 | } | 745 | } |
| 759 | 746 | ||
| 760 | void RasterizerOpenGL::FlushAll() {} | 747 | void RasterizerOpenGL::FlushAll() {} |
| 761 | 748 | ||
| 762 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | 749 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| 763 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 750 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 764 | 751 | res_cache.FlushRegion(addr, size); | |
| 765 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 766 | // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit | ||
| 767 | res_cache.FlushRegion(addr, size); | ||
| 768 | } | ||
| 769 | } | 752 | } |
| 770 | 753 | ||
| 771 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 754 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -809,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 809 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | 792 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; |
| 810 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | 793 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); |
| 811 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 794 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); |
| 812 | ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); | 795 | |
| 796 | if (params.pixel_format != pixel_format) { | ||
| 797 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); | ||
| 798 | } | ||
| 813 | 799 | ||
| 814 | screen_info.display_texture = surface->Texture().handle; | 800 | screen_info.display_texture = surface->Texture().handle; |
| 815 | 801 | ||
| @@ -818,104 +804,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 818 | 804 | ||
| 819 | void RasterizerOpenGL::SamplerInfo::Create() { | 805 | void RasterizerOpenGL::SamplerInfo::Create() { |
| 820 | sampler.Create(); | 806 | sampler.Create(); |
| 821 | mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; | 807 | mag_filter = Tegra::Texture::TextureFilter::Linear; |
| 822 | wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; | 808 | min_filter = Tegra::Texture::TextureFilter::Linear; |
| 823 | uses_depth_compare = false; | 809 | wrap_u = Tegra::Texture::WrapMode::Wrap; |
| 810 | wrap_v = Tegra::Texture::WrapMode::Wrap; | ||
| 811 | wrap_p = Tegra::Texture::WrapMode::Wrap; | ||
| 812 | use_depth_compare = false; | ||
| 824 | depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; | 813 | depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; |
| 825 | 814 | ||
| 826 | // default is GL_LINEAR_MIPMAP_LINEAR | 815 | // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR |
| 827 | glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); | 816 | glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
| 828 | // Other attributes have correct defaults | ||
| 829 | glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); | 817 | glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); |
| 818 | |||
| 819 | // Other attributes have correct defaults | ||
| 830 | } | 820 | } |
| 831 | 821 | ||
| 832 | void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { | 822 | void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { |
| 833 | const GLuint s = sampler.handle; | 823 | const GLuint sampler_id = sampler.handle; |
| 834 | if (mag_filter != config.mag_filter) { | 824 | if (mag_filter != config.mag_filter) { |
| 835 | mag_filter = config.mag_filter; | 825 | mag_filter = config.mag_filter; |
| 836 | glSamplerParameteri( | 826 | glSamplerParameteri( |
| 837 | s, GL_TEXTURE_MAG_FILTER, | 827 | sampler_id, GL_TEXTURE_MAG_FILTER, |
| 838 | MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); | 828 | MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); |
| 839 | } | 829 | } |
| 840 | if (min_filter != config.min_filter || mip_filter != config.mip_filter) { | 830 | if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) { |
| 841 | min_filter = config.min_filter; | 831 | min_filter = config.min_filter; |
| 842 | mip_filter = config.mip_filter; | 832 | mipmap_filter = config.mipmap_filter; |
| 843 | glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, | 833 | glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, |
| 844 | MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); | 834 | MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter)); |
| 845 | } | 835 | } |
| 846 | 836 | ||
| 847 | if (wrap_u != config.wrap_u) { | 837 | if (wrap_u != config.wrap_u) { |
| 848 | wrap_u = config.wrap_u; | 838 | wrap_u = config.wrap_u; |
| 849 | glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); | 839 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); |
| 850 | } | 840 | } |
| 851 | if (wrap_v != config.wrap_v) { | 841 | if (wrap_v != config.wrap_v) { |
| 852 | wrap_v = config.wrap_v; | 842 | wrap_v = config.wrap_v; |
| 853 | glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); | 843 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); |
| 854 | } | 844 | } |
| 855 | if (wrap_p != config.wrap_p) { | 845 | if (wrap_p != config.wrap_p) { |
| 856 | wrap_p = config.wrap_p; | 846 | wrap_p = config.wrap_p; |
| 857 | glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); | 847 | glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); |
| 858 | } | 848 | } |
| 859 | 849 | ||
| 860 | if (uses_depth_compare != (config.depth_compare_enabled == 1)) { | 850 | if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) { |
| 861 | uses_depth_compare = (config.depth_compare_enabled == 1); | 851 | use_depth_compare = enabled; |
| 862 | if (uses_depth_compare) { | 852 | glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, |
| 863 | glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); | 853 | use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); |
| 864 | } else { | ||
| 865 | glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE); | ||
| 866 | } | ||
| 867 | } | 854 | } |
| 868 | 855 | ||
| 869 | if (depth_compare_func != config.depth_compare_func) { | 856 | if (depth_compare_func != config.depth_compare_func) { |
| 870 | depth_compare_func = config.depth_compare_func; | 857 | depth_compare_func = config.depth_compare_func; |
| 871 | glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, | 858 | glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, |
| 872 | MaxwellToGL::DepthCompareFunc(depth_compare_func)); | 859 | MaxwellToGL::DepthCompareFunc(depth_compare_func)); |
| 873 | } | 860 | } |
| 874 | 861 | ||
| 875 | GLvec4 new_border_color; | 862 | if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) { |
| 876 | if (config.srgb_conversion) { | ||
| 877 | new_border_color[0] = config.srgb_border_color_r / 255.0f; | ||
| 878 | new_border_color[1] = config.srgb_border_color_g / 255.0f; | ||
| 879 | new_border_color[2] = config.srgb_border_color_g / 255.0f; | ||
| 880 | } else { | ||
| 881 | new_border_color[0] = config.border_color_r; | ||
| 882 | new_border_color[1] = config.border_color_g; | ||
| 883 | new_border_color[2] = config.border_color_b; | ||
| 884 | } | ||
| 885 | new_border_color[3] = config.border_color_a; | ||
| 886 | |||
| 887 | if (border_color != new_border_color) { | ||
| 888 | border_color = new_border_color; | 863 | border_color = new_border_color; |
| 889 | glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); | 864 | glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data()); |
| 890 | } | 865 | } |
| 891 | 866 | ||
| 892 | const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); | 867 | if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) { |
| 893 | if (anisotropic_max != max_anisotropic) { | 868 | max_anisotropic = anisotropic; |
| 894 | max_anisotropic = anisotropic_max; | ||
| 895 | if (GLAD_GL_ARB_texture_filter_anisotropic) { | 869 | if (GLAD_GL_ARB_texture_filter_anisotropic) { |
| 896 | glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); | 870 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); |
| 897 | } else if (GLAD_GL_EXT_texture_filter_anisotropic) { | 871 | } else if (GLAD_GL_EXT_texture_filter_anisotropic) { |
| 898 | glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); | 872 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); |
| 899 | } | 873 | } |
| 900 | } | 874 | } |
| 901 | const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f; | ||
| 902 | if (lod_min != min_lod) { | ||
| 903 | min_lod = lod_min; | ||
| 904 | glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod); | ||
| 905 | } | ||
| 906 | 875 | ||
| 907 | const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; | 876 | if (const float min = config.GetMinLod(); min_lod != min) { |
| 908 | if (lod_max != max_lod) { | 877 | min_lod = min; |
| 909 | max_lod = lod_max; | 878 | glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod); |
| 910 | glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod); | ||
| 911 | } | 879 | } |
| 912 | const u32 bias = config.mip_lod_bias.Value(); | 880 | if (const float max = config.GetMaxLod(); max_lod != max) { |
| 913 | // Sign extend the 13-bit value. | 881 | max_lod = max; |
| 914 | constexpr u32 mask = 1U << (13 - 1); | 882 | glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod); |
| 915 | const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; | 883 | } |
| 916 | if (lod_bias != bias_lod) { | 884 | |
| 917 | lod_bias = bias_lod; | 885 | if (const float bias = config.GetLodBias(); lod_bias != bias) { |
| 918 | glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias); | 886 | lod_bias = bias; |
| 887 | glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias); | ||
| 919 | } | 888 | } |
| 920 | } | 889 | } |
| 921 | 890 | ||
| @@ -955,8 +924,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader | |||
| 955 | size = buffer.size; | 924 | size = buffer.size; |
| 956 | 925 | ||
| 957 | if (size > MaxConstbufferSize) { | 926 | if (size > MaxConstbufferSize) { |
| 958 | LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, | 927 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, |
| 959 | MaxConstbufferSize); | 928 | MaxConstbufferSize); |
| 960 | size = MaxConstbufferSize; | 929 | size = MaxConstbufferSize; |
| 961 | } | 930 | } |
| 962 | } else { | 931 | } else { |
| @@ -1016,10 +985,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s | |||
| 1016 | 985 | ||
| 1017 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); | 986 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); |
| 1018 | 987 | ||
| 1019 | Surface surface = res_cache.GetTextureSurface(texture, entry); | 988 | if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { |
| 1020 | if (surface != nullptr) { | ||
| 1021 | state.texture_units[current_bindpoint].texture = | 989 | state.texture_units[current_bindpoint].texture = |
| 1022 | entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; | 990 | surface->Texture(entry.IsArray()).handle; |
| 1023 | surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | 991 | surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, |
| 1024 | texture.tic.w_source); | 992 | texture.tic.w_source); |
| 1025 | } else { | 993 | } else { |
| @@ -1251,11 +1219,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { | |||
| 1251 | 1219 | ||
| 1252 | void RasterizerOpenGL::SyncTransformFeedback() { | 1220 | void RasterizerOpenGL::SyncTransformFeedback() { |
| 1253 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 1221 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 1254 | 1222 | UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); | |
| 1255 | if (regs.tfb_enabled != 0) { | ||
| 1256 | LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented"); | ||
| 1257 | UNREACHABLE(); | ||
| 1258 | } | ||
| 1259 | } | 1223 | } |
| 1260 | 1224 | ||
| 1261 | void RasterizerOpenGL::SyncPointState() { | 1225 | void RasterizerOpenGL::SyncPointState() { |
| @@ -1275,12 +1239,8 @@ void RasterizerOpenGL::SyncPolygonOffset() { | |||
| 1275 | 1239 | ||
| 1276 | void RasterizerOpenGL::CheckAlphaTests() { | 1240 | void RasterizerOpenGL::CheckAlphaTests() { |
| 1277 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 1241 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 1278 | 1242 | UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, | |
| 1279 | if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { | 1243 | "Alpha Testing is enabled with more than one rendertarget"); |
| 1280 | LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, " | ||
| 1281 | "this behavior is undefined."); | ||
| 1282 | UNREACHABLE(); | ||
| 1283 | } | ||
| 1284 | } | 1244 | } |
| 1285 | 1245 | ||
| 1286 | } // namespace OpenGL | 1246 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2f0524f85..7e63f8008 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -94,11 +94,12 @@ private: | |||
| 94 | private: | 94 | private: |
| 95 | Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; | 95 | Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; |
| 96 | Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; | 96 | Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; |
| 97 | Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; | 97 | Tegra::Texture::TextureMipmapFilter mipmap_filter = |
| 98 | Tegra::Texture::TextureMipmapFilter::None; | ||
| 98 | Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; | 99 | Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; |
| 99 | Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; | 100 | Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; |
| 100 | Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; | 101 | Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; |
| 101 | bool uses_depth_compare = false; | 102 | bool use_depth_compare = false; |
| 102 | Tegra::Texture::DepthCompareFunc depth_compare_func = | 103 | Tegra::Texture::DepthCompareFunc depth_compare_func = |
| 103 | Tegra::Texture::DepthCompareFunc::Always; | 104 | Tegra::Texture::DepthCompareFunc::Always; |
| 104 | GLvec4 border_color = {}; | 105 | GLvec4 border_color = {}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index b5a9722f9..e9eb6e921 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 22 | #include "video_core/renderer_opengl/utils.h" | 22 | #include "video_core/renderer_opengl/utils.h" |
| 23 | #include "video_core/surface.h" | 23 | #include "video_core/surface.h" |
| 24 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/convert.h" |
| 25 | #include "video_core/textures/decoders.h" | 25 | #include "video_core/textures/decoders.h" |
| 26 | 26 | ||
| 27 | namespace OpenGL { | 27 | namespace OpenGL { |
| @@ -400,6 +400,27 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType | |||
| 400 | return format; | 400 | return format; |
| 401 | } | 401 | } |
| 402 | 402 | ||
| 403 | /// Returns the discrepant array target | ||
| 404 | constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { | ||
| 405 | switch (target) { | ||
| 406 | case SurfaceTarget::Texture1D: | ||
| 407 | return GL_TEXTURE_1D_ARRAY; | ||
| 408 | case SurfaceTarget::Texture2D: | ||
| 409 | return GL_TEXTURE_2D_ARRAY; | ||
| 410 | case SurfaceTarget::Texture3D: | ||
| 411 | return GL_NONE; | ||
| 412 | case SurfaceTarget::Texture1DArray: | ||
| 413 | return GL_TEXTURE_1D; | ||
| 414 | case SurfaceTarget::Texture2DArray: | ||
| 415 | return GL_TEXTURE_2D; | ||
| 416 | case SurfaceTarget::TextureCubemap: | ||
| 417 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 418 | case SurfaceTarget::TextureCubeArray: | ||
| 419 | return GL_TEXTURE_CUBE_MAP; | ||
| 420 | } | ||
| 421 | return GL_NONE; | ||
| 422 | } | ||
| 423 | |||
| 403 | Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { | 424 | Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { |
| 404 | u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; | 425 | u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; |
| 405 | if (IsPixelFormatASTC(pixel_format)) { | 426 | if (IsPixelFormatASTC(pixel_format)) { |
| @@ -597,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 597 | } | 618 | } |
| 598 | } | 619 | } |
| 599 | 620 | ||
| 600 | static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) { | ||
| 601 | union S8Z24 { | ||
| 602 | BitField<0, 24, u32> z24; | ||
| 603 | BitField<24, 8, u32> s8; | ||
| 604 | }; | ||
| 605 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 606 | |||
| 607 | union Z24S8 { | ||
| 608 | BitField<0, 8, u32> s8; | ||
| 609 | BitField<8, 24, u32> z24; | ||
| 610 | }; | ||
| 611 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 612 | |||
| 613 | S8Z24 s8z24_pixel{}; | ||
| 614 | Z24S8 z24s8_pixel{}; | ||
| 615 | constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)}; | ||
| 616 | for (std::size_t y = 0; y < height; ++y) { | ||
| 617 | for (std::size_t x = 0; x < width; ++x) { | ||
| 618 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 619 | if (reverse) { | ||
| 620 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 621 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 622 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 623 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 624 | } else { | ||
| 625 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 626 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 627 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 628 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 629 | } | ||
| 630 | } | ||
| 631 | } | ||
| 632 | } | ||
| 633 | |||
| 634 | /** | ||
| 635 | * Helper function to perform software conversion (as needed) when loading a buffer from Switch | ||
| 636 | * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with | ||
| 637 | * typical desktop GPUs. | ||
| 638 | */ | ||
| 639 | static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||
| 640 | u32 width, u32 height, u32 depth) { | ||
| 641 | switch (pixel_format) { | ||
| 642 | case PixelFormat::ASTC_2D_4X4: | ||
| 643 | case PixelFormat::ASTC_2D_8X8: | ||
| 644 | case PixelFormat::ASTC_2D_8X5: | ||
| 645 | case PixelFormat::ASTC_2D_5X4: | ||
| 646 | case PixelFormat::ASTC_2D_5X5: | ||
| 647 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 648 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 649 | case PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 650 | case PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 651 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 652 | case PixelFormat::ASTC_2D_10X8: | ||
| 653 | case PixelFormat::ASTC_2D_10X8_SRGB: { | ||
| 654 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 655 | u32 block_width{}; | ||
| 656 | u32 block_height{}; | ||
| 657 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 658 | data = | ||
| 659 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | ||
| 660 | break; | ||
| 661 | } | ||
| 662 | case PixelFormat::S8Z24: | ||
| 663 | // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. | ||
| 664 | ConvertS8Z24ToZ24S8(data, width, height, false); | ||
| 665 | break; | ||
| 666 | } | ||
| 667 | } | ||
| 668 | |||
| 669 | /** | ||
| 670 | * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to | ||
| 671 | * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or | ||
| 672 | * with typical desktop GPUs. | ||
| 673 | */ | ||
| 674 | static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||
| 675 | u32 width, u32 height) { | ||
| 676 | switch (pixel_format) { | ||
| 677 | case PixelFormat::ASTC_2D_4X4: | ||
| 678 | case PixelFormat::ASTC_2D_8X8: | ||
| 679 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 680 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 681 | case PixelFormat::ASTC_2D_5X5: | ||
| 682 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 683 | case PixelFormat::ASTC_2D_10X8: | ||
| 684 | case PixelFormat::ASTC_2D_10X8_SRGB: { | ||
| 685 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 686 | static_cast<u32>(pixel_format)); | ||
| 687 | UNREACHABLE(); | ||
| 688 | break; | ||
| 689 | } | ||
| 690 | case PixelFormat::S8Z24: | ||
| 691 | // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. | ||
| 692 | ConvertS8Z24ToZ24S8(data, width, height, true); | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | } | ||
| 696 | |||
| 697 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | 621 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); |
| 698 | void CachedSurface::LoadGLBuffer() { | 622 | void CachedSurface::LoadGLBuffer() { |
| 699 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | 623 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); |
| @@ -722,8 +646,16 @@ void CachedSurface::LoadGLBuffer() { | |||
| 722 | } | 646 | } |
| 723 | } | 647 | } |
| 724 | for (u32 i = 0; i < params.max_mip_level; i++) { | 648 | for (u32 i = 0; i < params.max_mip_level; i++) { |
| 725 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), | 649 | const u32 width = params.MipWidth(i); |
| 726 | params.MipHeight(i), params.MipDepth(i)); | 650 | const u32 height = params.MipHeight(i); |
| 651 | const u32 depth = params.MipDepth(i); | ||
| 652 | if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { | ||
| 653 | // Reserve size for RGBA8 conversion | ||
| 654 | constexpr std::size_t rgba_bpp = 4; | ||
| 655 | gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); | ||
| 656 | } | ||
| 657 | Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, | ||
| 658 | height, depth, true, true); | ||
| 727 | } | 659 | } |
| 728 | } | 660 | } |
| 729 | 661 | ||
| @@ -746,8 +678,8 @@ void CachedSurface::FlushGLBuffer() { | |||
| 746 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, | 678 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, |
| 747 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); | 679 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); |
| 748 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | 680 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); |
| 749 | ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, | 681 | Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, |
| 750 | params.height); | 682 | params.height, params.depth, true, true); |
| 751 | const u8* const texture_src_data = Memory::GetPointer(params.addr); | 683 | const u8* const texture_src_data = Memory::GetPointer(params.addr); |
| 752 | ASSERT(texture_src_data); | 684 | ASSERT(texture_src_data); |
| 753 | if (params.is_tiled) { | 685 | if (params.is_tiled) { |
| @@ -884,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, | |||
| 884 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | 816 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); |
| 885 | } | 817 | } |
| 886 | 818 | ||
| 887 | void CachedSurface::EnsureTextureView() { | 819 | void CachedSurface::EnsureTextureDiscrepantView() { |
| 888 | if (texture_view.handle != 0) | 820 | if (discrepant_view.handle != 0) |
| 889 | return; | 821 | return; |
| 890 | 822 | ||
| 891 | const GLenum target{TargetLayer()}; | 823 | const GLenum target{GetArrayDiscrepantTarget(params.target)}; |
| 824 | ASSERT(target != GL_NONE); | ||
| 825 | |||
| 892 | const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; | 826 | const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; |
| 893 | constexpr GLuint min_layer = 0; | 827 | constexpr GLuint min_layer = 0; |
| 894 | constexpr GLuint min_level = 0; | 828 | constexpr GLuint min_level = 0; |
| 895 | 829 | ||
| 896 | glGenTextures(1, &texture_view.handle); | 830 | glGenTextures(1, &discrepant_view.handle); |
| 897 | glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, | 831 | glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, |
| 898 | params.max_mip_level, min_layer, num_layers); | 832 | params.max_mip_level, min_layer, num_layers); |
| 899 | ApplyTextureDefaults(texture_view.handle, params.max_mip_level); | 833 | ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); |
| 900 | glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, | 834 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, |
| 901 | reinterpret_cast<const GLint*>(swizzle.data())); | 835 | reinterpret_cast<const GLint*>(swizzle.data())); |
| 902 | } | 836 | } |
| 903 | 837 | ||
| @@ -923,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, | |||
| 923 | swizzle = {new_x, new_y, new_z, new_w}; | 857 | swizzle = {new_x, new_y, new_z, new_w}; |
| 924 | const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); | 858 | const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); |
| 925 | glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | 859 | glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); |
| 926 | if (texture_view.handle != 0) { | 860 | if (discrepant_view.handle != 0) { |
| 927 | glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | 861 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); |
| 928 | } | 862 | } |
| 929 | } | 863 | } |
| 930 | 864 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 797bbdc9c..9cf6f50be 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -367,31 +367,19 @@ public: | |||
| 367 | return texture; | 367 | return texture; |
| 368 | } | 368 | } |
| 369 | 369 | ||
| 370 | const OGLTexture& TextureLayer() { | 370 | const OGLTexture& Texture(bool as_array) { |
| 371 | if (params.is_array) { | 371 | if (params.is_array == as_array) { |
| 372 | return Texture(); | 372 | return texture; |
| 373 | } else { | ||
| 374 | EnsureTextureDiscrepantView(); | ||
| 375 | return discrepant_view; | ||
| 373 | } | 376 | } |
| 374 | EnsureTextureView(); | ||
| 375 | return texture_view; | ||
| 376 | } | 377 | } |
| 377 | 378 | ||
| 378 | GLenum Target() const { | 379 | GLenum Target() const { |
| 379 | return gl_target; | 380 | return gl_target; |
| 380 | } | 381 | } |
| 381 | 382 | ||
| 382 | GLenum TargetLayer() const { | ||
| 383 | using VideoCore::Surface::SurfaceTarget; | ||
| 384 | switch (params.target) { | ||
| 385 | case SurfaceTarget::Texture1D: | ||
| 386 | return GL_TEXTURE_1D_ARRAY; | ||
| 387 | case SurfaceTarget::Texture2D: | ||
| 388 | return GL_TEXTURE_2D_ARRAY; | ||
| 389 | case SurfaceTarget::TextureCubemap: | ||
| 390 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 391 | } | ||
| 392 | return Target(); | ||
| 393 | } | ||
| 394 | |||
| 395 | const SurfaceParams& GetSurfaceParams() const { | 383 | const SurfaceParams& GetSurfaceParams() const { |
| 396 | return params; | 384 | return params; |
| 397 | } | 385 | } |
| @@ -431,10 +419,10 @@ public: | |||
| 431 | private: | 419 | private: |
| 432 | void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); | 420 | void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); |
| 433 | 421 | ||
| 434 | void EnsureTextureView(); | 422 | void EnsureTextureDiscrepantView(); |
| 435 | 423 | ||
| 436 | OGLTexture texture; | 424 | OGLTexture texture; |
| 437 | OGLTexture texture_view; | 425 | OGLTexture discrepant_view; |
| 438 | std::vector<std::vector<u8>> gl_buffer; | 426 | std::vector<std::vector<u8>> gl_buffer; |
| 439 | SurfaceParams params{}; | 427 | SurfaceParams params{}; |
| 440 | GLenum gl_target{}; | 428 | GLenum gl_target{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 72ff6ac6a..11d1169f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <string> | 6 | #include <string> |
| 7 | #include <string_view> | 7 | #include <string_view> |
| 8 | #include <utility> | ||
| 8 | #include <variant> | 9 | #include <variant> |
| 10 | #include <vector> | ||
| 9 | 11 | ||
| 10 | #include <fmt/format.h> | 12 | #include <fmt/format.h> |
| 11 | 13 | ||
| @@ -717,7 +719,7 @@ private: | |||
| 717 | } | 719 | } |
| 718 | 720 | ||
| 719 | std::string GenerateTexture(Operation operation, const std::string& func, | 721 | std::string GenerateTexture(Operation operation, const std::string& func, |
| 720 | bool is_extra_int = false) { | 722 | const std::vector<std::pair<Type, Node>>& extras) { |
| 721 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | 723 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 722 | 724 | ||
| 723 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 725 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| @@ -738,36 +740,47 @@ private: | |||
| 738 | expr += Visit(operation[i]); | 740 | expr += Visit(operation[i]); |
| 739 | 741 | ||
| 740 | const std::size_t next = i + 1; | 742 | const std::size_t next = i + 1; |
| 741 | if (next < count || has_array || has_shadow) | 743 | if (next < count) |
| 742 | expr += ", "; | 744 | expr += ", "; |
| 743 | } | 745 | } |
| 744 | if (has_array) { | 746 | if (has_array) { |
| 745 | expr += "float(ftoi(" + Visit(meta->array) + "))"; | 747 | expr += ", float(ftoi(" + Visit(meta->array) + "))"; |
| 746 | } | 748 | } |
| 747 | if (has_shadow) { | 749 | if (has_shadow) { |
| 748 | if (has_array) | 750 | expr += ", " + Visit(meta->depth_compare); |
| 749 | expr += ", "; | ||
| 750 | expr += Visit(meta->depth_compare); | ||
| 751 | } | 751 | } |
| 752 | expr += ')'; | 752 | expr += ')'; |
| 753 | 753 | ||
| 754 | for (const Node extra : meta->extras) { | 754 | for (const auto& extra_pair : extras) { |
| 755 | const auto [type, operand] = extra_pair; | ||
| 756 | if (operand == nullptr) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 755 | expr += ", "; | 759 | expr += ", "; |
| 756 | if (is_extra_int) { | 760 | |
| 757 | if (const auto immediate = std::get_if<ImmediateNode>(extra)) { | 761 | switch (type) { |
| 762 | case Type::Int: | ||
| 763 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||
| 758 | // Inline the string as an immediate integer in GLSL (some extra arguments are | 764 | // Inline the string as an immediate integer in GLSL (some extra arguments are |
| 759 | // required to be constant) | 765 | // required to be constant) |
| 760 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | 766 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); |
| 761 | } else { | 767 | } else { |
| 762 | expr += "ftoi(" + Visit(extra) + ')'; | 768 | expr += "ftoi(" + Visit(operand) + ')'; |
| 763 | } | 769 | } |
| 764 | } else { | 770 | break; |
| 765 | expr += Visit(extra); | 771 | case Type::Float: |
| 772 | expr += Visit(operand); | ||
| 773 | break; | ||
| 774 | default: { | ||
| 775 | const auto type_int = static_cast<u32>(type); | ||
| 776 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||
| 777 | expr += '0'; | ||
| 778 | break; | ||
| 779 | } | ||
| 766 | } | 780 | } |
| 767 | } | 781 | } |
| 768 | 782 | ||
| 769 | expr += ')'; | 783 | return expr + ')'; |
| 770 | return expr; | ||
| 771 | } | 784 | } |
| 772 | 785 | ||
| 773 | std::string Assign(Operation operation) { | 786 | std::string Assign(Operation operation) { |
| @@ -1146,7 +1159,7 @@ private: | |||
| 1146 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1159 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1147 | ASSERT(meta); | 1160 | ASSERT(meta); |
| 1148 | 1161 | ||
| 1149 | std::string expr = GenerateTexture(operation, "texture"); | 1162 | std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); |
| 1150 | if (meta->sampler.IsShadow()) { | 1163 | if (meta->sampler.IsShadow()) { |
| 1151 | expr = "vec4(" + expr + ')'; | 1164 | expr = "vec4(" + expr + ')'; |
| 1152 | } | 1165 | } |
| @@ -1157,7 +1170,7 @@ private: | |||
| 1157 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1170 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1158 | ASSERT(meta); | 1171 | ASSERT(meta); |
| 1159 | 1172 | ||
| 1160 | std::string expr = GenerateTexture(operation, "textureLod"); | 1173 | std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); |
| 1161 | if (meta->sampler.IsShadow()) { | 1174 | if (meta->sampler.IsShadow()) { |
| 1162 | expr = "vec4(" + expr + ')'; | 1175 | expr = "vec4(" + expr + ')'; |
| 1163 | } | 1176 | } |
| @@ -1168,7 +1181,8 @@ private: | |||
| 1168 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1181 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1169 | ASSERT(meta); | 1182 | ASSERT(meta); |
| 1170 | 1183 | ||
| 1171 | return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + | 1184 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; |
| 1185 | return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + | ||
| 1172 | GetSwizzle(meta->element); | 1186 | GetSwizzle(meta->element); |
| 1173 | } | 1187 | } |
| 1174 | 1188 | ||
| @@ -1197,8 +1211,8 @@ private: | |||
| 1197 | ASSERT(meta); | 1211 | ASSERT(meta); |
| 1198 | 1212 | ||
| 1199 | if (meta->element < 2) { | 1213 | if (meta->element < 2) { |
| 1200 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + | 1214 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + |
| 1201 | GetSwizzle(meta->element) + "))"; | 1215 | " * vec2(256))" + GetSwizzle(meta->element) + "))"; |
| 1202 | } | 1216 | } |
| 1203 | return "0"; | 1217 | return "0"; |
| 1204 | } | 1218 | } |
| @@ -1224,9 +1238,9 @@ private: | |||
| 1224 | else if (next < count) | 1238 | else if (next < count) |
| 1225 | expr += ", "; | 1239 | expr += ", "; |
| 1226 | } | 1240 | } |
| 1227 | for (std::size_t i = 0; i < meta->extras.size(); ++i) { | 1241 | if (meta->lod) { |
| 1228 | expr += ", "; | 1242 | expr += ", "; |
| 1229 | expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); | 1243 | expr += CastOperand(Visit(meta->lod), Type::Int); |
| 1230 | } | 1244 | } |
| 1231 | expr += ')'; | 1245 | expr += ')'; |
| 1232 | 1246 | ||
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 219f08053..9419326a3 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp | |||
| @@ -461,7 +461,7 @@ void OpenGLState::ApplyTextures() const { | |||
| 461 | 461 | ||
| 462 | if (has_delta) { | 462 | if (has_delta) { |
| 463 | glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), | 463 | glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), |
| 464 | textures.data()); | 464 | textures.data() + first); |
| 465 | } | 465 | } |
| 466 | } | 466 | } |
| 467 | 467 | ||
| @@ -482,7 +482,7 @@ void OpenGLState::ApplySamplers() const { | |||
| 482 | } | 482 | } |
| 483 | if (has_delta) { | 483 | if (has_delta) { |
| 484 | glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), | 484 | glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1), |
| 485 | samplers.data()); | 485 | samplers.data() + first); |
| 486 | } | 486 | } |
| 487 | } | 487 | } |
| 488 | 488 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e60b2eb44..8b510b6ae 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 244 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 244 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | void RendererOpenGL::AddTelemetryFields() { | ||
| 248 | const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; | ||
| 249 | const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; | ||
| 250 | const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; | ||
| 251 | |||
| 252 | LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); | ||
| 253 | LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); | ||
| 254 | LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); | ||
| 255 | |||
| 256 | auto& telemetry_session = system.TelemetrySession(); | ||
| 257 | telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); | ||
| 258 | telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); | ||
| 259 | telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); | ||
| 260 | } | ||
| 261 | |||
| 247 | void RendererOpenGL::CreateRasterizer() { | 262 | void RendererOpenGL::CreateRasterizer() { |
| 248 | if (rasterizer) { | 263 | if (rasterizer) { |
| 249 | return; | 264 | return; |
| @@ -466,17 +481,7 @@ bool RendererOpenGL::Init() { | |||
| 466 | glDebugMessageCallback(DebugHandler, nullptr); | 481 | glDebugMessageCallback(DebugHandler, nullptr); |
| 467 | } | 482 | } |
| 468 | 483 | ||
| 469 | const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; | 484 | AddTelemetryFields(); |
| 470 | const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; | ||
| 471 | const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; | ||
| 472 | |||
| 473 | LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); | ||
| 474 | LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); | ||
| 475 | LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); | ||
| 476 | |||
| 477 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); | ||
| 478 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); | ||
| 479 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); | ||
| 480 | 485 | ||
| 481 | if (!GLAD_GL_VERSION_4_3) { | 486 | if (!GLAD_GL_VERSION_4_3) { |
| 482 | return false; | 487 | return false; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c168fa89e..6cbf9d2cb 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -60,6 +60,7 @@ public: | |||
| 60 | 60 | ||
| 61 | private: | 61 | private: |
| 62 | void InitOpenGLObjects(); | 62 | void InitOpenGLObjects(); |
| 63 | void AddTelemetryFields(); | ||
| 63 | void CreateRasterizer(); | 64 | void CreateRasterizer(); |
| 64 | 65 | ||
| 65 | void ConfigureFramebufferTexture(TextureInfo& texture, | 66 | void ConfigureFramebufferTexture(TextureInfo& texture, |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp new file mode 100644 index 000000000..34bf26ff2 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -0,0 +1,483 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/surface.h" | ||
| 13 | |||
| 14 | namespace Vulkan::MaxwellToVK { | ||
| 15 | |||
| 16 | namespace Sampler { | ||
| 17 | |||
| 18 | vk::Filter Filter(Tegra::Texture::TextureFilter filter) { | ||
| 19 | switch (filter) { | ||
| 20 | case Tegra::Texture::TextureFilter::Linear: | ||
| 21 | return vk::Filter::eLinear; | ||
| 22 | case Tegra::Texture::TextureFilter::Nearest: | ||
| 23 | return vk::Filter::eNearest; | ||
| 24 | } | ||
| 25 | UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); | ||
| 26 | return {}; | ||
| 27 | } | ||
| 28 | |||
| 29 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { | ||
| 30 | switch (mipmap_filter) { | ||
| 31 | case Tegra::Texture::TextureMipmapFilter::None: | ||
| 32 | // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping | ||
| 33 | // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to | ||
| 34 | // use an image view with a single mipmap level to emulate this. | ||
| 35 | return vk::SamplerMipmapMode::eLinear; | ||
| 36 | case Tegra::Texture::TextureMipmapFilter::Linear: | ||
| 37 | return vk::SamplerMipmapMode::eLinear; | ||
| 38 | case Tegra::Texture::TextureMipmapFilter::Nearest: | ||
| 39 | return vk::SamplerMipmapMode::eNearest; | ||
| 40 | } | ||
| 41 | UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); | ||
| 42 | return {}; | ||
| 43 | } | ||
| 44 | |||
| 45 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { | ||
| 46 | switch (wrap_mode) { | ||
| 47 | case Tegra::Texture::WrapMode::Wrap: | ||
| 48 | return vk::SamplerAddressMode::eRepeat; | ||
| 49 | case Tegra::Texture::WrapMode::Mirror: | ||
| 50 | return vk::SamplerAddressMode::eMirroredRepeat; | ||
| 51 | case Tegra::Texture::WrapMode::ClampToEdge: | ||
| 52 | return vk::SamplerAddressMode::eClampToEdge; | ||
| 53 | case Tegra::Texture::WrapMode::Border: | ||
| 54 | return vk::SamplerAddressMode::eClampToBorder; | ||
| 55 | case Tegra::Texture::WrapMode::ClampOGL: | ||
| 56 | // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use | ||
| 57 | // eClampToBorder to get the border color of the texture, and then sample the edge to | ||
| 58 | // manually mix them. However the shader part of this is not yet implemented. | ||
| 59 | return vk::SamplerAddressMode::eClampToBorder; | ||
| 60 | case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: | ||
| 61 | return vk::SamplerAddressMode::eMirrorClampToEdge; | ||
| 62 | case Tegra::Texture::WrapMode::MirrorOnceBorder: | ||
| 63 | UNIMPLEMENTED(); | ||
| 64 | return vk::SamplerAddressMode::eMirrorClampToEdge; | ||
| 65 | } | ||
| 66 | UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); | ||
| 67 | return {}; | ||
| 68 | } | ||
| 69 | |||
| 70 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { | ||
| 71 | switch (depth_compare_func) { | ||
| 72 | case Tegra::Texture::DepthCompareFunc::Never: | ||
| 73 | return vk::CompareOp::eNever; | ||
| 74 | case Tegra::Texture::DepthCompareFunc::Less: | ||
| 75 | return vk::CompareOp::eLess; | ||
| 76 | case Tegra::Texture::DepthCompareFunc::LessEqual: | ||
| 77 | return vk::CompareOp::eLessOrEqual; | ||
| 78 | case Tegra::Texture::DepthCompareFunc::Equal: | ||
| 79 | return vk::CompareOp::eEqual; | ||
| 80 | case Tegra::Texture::DepthCompareFunc::NotEqual: | ||
| 81 | return vk::CompareOp::eNotEqual; | ||
| 82 | case Tegra::Texture::DepthCompareFunc::Greater: | ||
| 83 | return vk::CompareOp::eGreater; | ||
| 84 | case Tegra::Texture::DepthCompareFunc::GreaterEqual: | ||
| 85 | return vk::CompareOp::eGreaterOrEqual; | ||
| 86 | case Tegra::Texture::DepthCompareFunc::Always: | ||
| 87 | return vk::CompareOp::eAlways; | ||
| 88 | } | ||
| 89 | UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", | ||
| 90 | static_cast<u32>(depth_compare_func)); | ||
| 91 | return {}; | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Sampler | ||
| 95 | |||
| 96 | struct FormatTuple { | ||
| 97 | vk::Format format; ///< Vulkan format | ||
| 98 | ComponentType component_type; ///< Abstracted component type | ||
| 99 | bool attachable; ///< True when this format can be used as an attachment | ||
| 100 | }; | ||
| 101 | |||
| 102 | static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | ||
| 103 | {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U | ||
| 104 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S | ||
| 105 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI | ||
| 106 | {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U | ||
| 107 | {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U | ||
| 108 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U | ||
| 109 | {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U | ||
| 110 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI | ||
| 111 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F | ||
| 112 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U | ||
| 113 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI | ||
| 114 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F | ||
| 115 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI | ||
| 116 | {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1 | ||
| 117 | {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23 | ||
| 118 | {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45 | ||
| 119 | {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1 | ||
| 120 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM | ||
| 121 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM | ||
| 122 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U | ||
| 123 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16 | ||
| 124 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16 | ||
| 125 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4 | ||
| 126 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8 | ||
| 127 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F | ||
| 128 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F | ||
| 129 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F | ||
| 130 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F | ||
| 131 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U | ||
| 132 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S | ||
| 133 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI | ||
| 134 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I | ||
| 135 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16 | ||
| 136 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F | ||
| 137 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI | ||
| 138 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I | ||
| 139 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S | ||
| 140 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F | ||
| 141 | {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB | ||
| 142 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U | ||
| 143 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S | ||
| 144 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI | ||
| 145 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI | ||
| 146 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8 | ||
| 147 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5 | ||
| 148 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4 | ||
| 149 | |||
| 150 | // Compressed sRGB formats | ||
| 151 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB | ||
| 152 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB | ||
| 153 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB | ||
| 154 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB | ||
| 155 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB | ||
| 156 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB | ||
| 157 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB | ||
| 158 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB | ||
| 159 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB | ||
| 160 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5 | ||
| 161 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB | ||
| 162 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8 | ||
| 163 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB | ||
| 164 | |||
| 165 | // Depth formats | ||
| 166 | {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F | ||
| 167 | {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16 | ||
| 168 | |||
| 169 | // DepthStencil formats | ||
| 170 | {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8 | ||
| 171 | {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated) | ||
| 172 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8 | ||
| 173 | }}; | ||
| 174 | |||
| 175 | static constexpr bool IsZetaFormat(PixelFormat pixel_format) { | ||
| 176 | return pixel_format >= PixelFormat::MaxColorFormat && | ||
| 177 | pixel_format < PixelFormat::MaxDepthStencilFormat; | ||
| 178 | } | ||
| 179 | |||
| 180 | std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, | ||
| 181 | PixelFormat pixel_format, ComponentType component_type) { | ||
| 182 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | ||
| 183 | |||
| 184 | const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)]; | ||
| 185 | UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined, | ||
| 186 | "Unimplemented texture format with pixel format={} and component type={}", | ||
| 187 | static_cast<u32>(pixel_format), static_cast<u32>(component_type)); | ||
| 188 | ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch"); | ||
| 189 | |||
| 190 | auto usage = vk::FormatFeatureFlagBits::eSampledImage | | ||
| 191 | vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc; | ||
| 192 | if (tuple.attachable) { | ||
| 193 | usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment | ||
| 194 | : vk::FormatFeatureFlagBits::eColorAttachment; | ||
| 195 | } | ||
| 196 | return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; | ||
| 197 | } | ||
| 198 | |||
| 199 | vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) { | ||
| 200 | switch (stage) { | ||
| 201 | case Maxwell::ShaderStage::Vertex: | ||
| 202 | return vk::ShaderStageFlagBits::eVertex; | ||
| 203 | case Maxwell::ShaderStage::TesselationControl: | ||
| 204 | return vk::ShaderStageFlagBits::eTessellationControl; | ||
| 205 | case Maxwell::ShaderStage::TesselationEval: | ||
| 206 | return vk::ShaderStageFlagBits::eTessellationEvaluation; | ||
| 207 | case Maxwell::ShaderStage::Geometry: | ||
| 208 | return vk::ShaderStageFlagBits::eGeometry; | ||
| 209 | case Maxwell::ShaderStage::Fragment: | ||
| 210 | return vk::ShaderStageFlagBits::eFragment; | ||
| 211 | } | ||
| 212 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); | ||
| 213 | return {}; | ||
| 214 | } | ||
| 215 | |||
| 216 | vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { | ||
| 217 | switch (topology) { | ||
| 218 | case Maxwell::PrimitiveTopology::Points: | ||
| 219 | return vk::PrimitiveTopology::ePointList; | ||
| 220 | case Maxwell::PrimitiveTopology::Lines: | ||
| 221 | return vk::PrimitiveTopology::eLineList; | ||
| 222 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 223 | return vk::PrimitiveTopology::eLineStrip; | ||
| 224 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 225 | return vk::PrimitiveTopology::eTriangleList; | ||
| 226 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 227 | return vk::PrimitiveTopology::eTriangleStrip; | ||
| 228 | } | ||
| 229 | UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); | ||
| 230 | return {}; | ||
| 231 | } | ||
| 232 | |||
| 233 | vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { | ||
| 234 | switch (type) { | ||
| 235 | case Maxwell::VertexAttribute::Type::SignedNorm: | ||
| 236 | break; | ||
| 237 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||
| 238 | switch (size) { | ||
| 239 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 240 | return vk::Format::eR8G8B8A8Unorm; | ||
| 241 | default: | ||
| 242 | break; | ||
| 243 | } | ||
| 244 | break; | ||
| 245 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 246 | break; | ||
| 247 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 248 | switch (size) { | ||
| 249 | case Maxwell::VertexAttribute::Size::Size_32: | ||
| 250 | return vk::Format::eR32Uint; | ||
| 251 | default: | ||
| 252 | break; | ||
| 253 | } | ||
| 254 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 255 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 256 | break; | ||
| 257 | case Maxwell::VertexAttribute::Type::Float: | ||
| 258 | switch (size) { | ||
| 259 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | ||
| 260 | return vk::Format::eR32G32B32A32Sfloat; | ||
| 261 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 262 | return vk::Format::eR32G32B32Sfloat; | ||
| 263 | case Maxwell::VertexAttribute::Size::Size_32_32: | ||
| 264 | return vk::Format::eR32G32Sfloat; | ||
| 265 | case Maxwell::VertexAttribute::Size::Size_32: | ||
| 266 | return vk::Format::eR32Sfloat; | ||
| 267 | default: | ||
| 268 | break; | ||
| 269 | } | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type), | ||
| 273 | static_cast<u32>(size)); | ||
| 274 | return {}; | ||
| 275 | } | ||
| 276 | |||
| 277 | vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | ||
| 278 | switch (comparison) { | ||
| 279 | case Maxwell::ComparisonOp::Never: | ||
| 280 | case Maxwell::ComparisonOp::NeverOld: | ||
| 281 | return vk::CompareOp::eNever; | ||
| 282 | case Maxwell::ComparisonOp::Less: | ||
| 283 | case Maxwell::ComparisonOp::LessOld: | ||
| 284 | return vk::CompareOp::eLess; | ||
| 285 | case Maxwell::ComparisonOp::Equal: | ||
| 286 | case Maxwell::ComparisonOp::EqualOld: | ||
| 287 | return vk::CompareOp::eEqual; | ||
| 288 | case Maxwell::ComparisonOp::LessEqual: | ||
| 289 | case Maxwell::ComparisonOp::LessEqualOld: | ||
| 290 | return vk::CompareOp::eLessOrEqual; | ||
| 291 | case Maxwell::ComparisonOp::Greater: | ||
| 292 | case Maxwell::ComparisonOp::GreaterOld: | ||
| 293 | return vk::CompareOp::eGreater; | ||
| 294 | case Maxwell::ComparisonOp::NotEqual: | ||
| 295 | case Maxwell::ComparisonOp::NotEqualOld: | ||
| 296 | return vk::CompareOp::eNotEqual; | ||
| 297 | case Maxwell::ComparisonOp::GreaterEqual: | ||
| 298 | case Maxwell::ComparisonOp::GreaterEqualOld: | ||
| 299 | return vk::CompareOp::eGreaterOrEqual; | ||
| 300 | case Maxwell::ComparisonOp::Always: | ||
| 301 | case Maxwell::ComparisonOp::AlwaysOld: | ||
| 302 | return vk::CompareOp::eAlways; | ||
| 303 | } | ||
| 304 | UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); | ||
| 305 | return {}; | ||
| 306 | } | ||
| 307 | |||
| 308 | vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) { | ||
| 309 | switch (index_format) { | ||
| 310 | case Maxwell::IndexFormat::UnsignedByte: | ||
| 311 | UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format"); | ||
| 312 | return vk::IndexType::eUint16; | ||
| 313 | case Maxwell::IndexFormat::UnsignedShort: | ||
| 314 | return vk::IndexType::eUint16; | ||
| 315 | case Maxwell::IndexFormat::UnsignedInt: | ||
| 316 | return vk::IndexType::eUint32; | ||
| 317 | } | ||
| 318 | UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); | ||
| 319 | return {}; | ||
| 320 | } | ||
| 321 | |||
| 322 | vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { | ||
| 323 | switch (stencil_op) { | ||
| 324 | case Maxwell::StencilOp::Keep: | ||
| 325 | case Maxwell::StencilOp::KeepOGL: | ||
| 326 | return vk::StencilOp::eKeep; | ||
| 327 | case Maxwell::StencilOp::Zero: | ||
| 328 | case Maxwell::StencilOp::ZeroOGL: | ||
| 329 | return vk::StencilOp::eZero; | ||
| 330 | case Maxwell::StencilOp::Replace: | ||
| 331 | case Maxwell::StencilOp::ReplaceOGL: | ||
| 332 | return vk::StencilOp::eReplace; | ||
| 333 | case Maxwell::StencilOp::Incr: | ||
| 334 | case Maxwell::StencilOp::IncrOGL: | ||
| 335 | return vk::StencilOp::eIncrementAndClamp; | ||
| 336 | case Maxwell::StencilOp::Decr: | ||
| 337 | case Maxwell::StencilOp::DecrOGL: | ||
| 338 | return vk::StencilOp::eDecrementAndClamp; | ||
| 339 | case Maxwell::StencilOp::Invert: | ||
| 340 | case Maxwell::StencilOp::InvertOGL: | ||
| 341 | return vk::StencilOp::eInvert; | ||
| 342 | case Maxwell::StencilOp::IncrWrap: | ||
| 343 | case Maxwell::StencilOp::IncrWrapOGL: | ||
| 344 | return vk::StencilOp::eIncrementAndWrap; | ||
| 345 | case Maxwell::StencilOp::DecrWrap: | ||
| 346 | case Maxwell::StencilOp::DecrWrapOGL: | ||
| 347 | return vk::StencilOp::eDecrementAndWrap; | ||
| 348 | } | ||
| 349 | UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); | ||
| 350 | return {}; | ||
| 351 | } | ||
| 352 | |||
| 353 | vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { | ||
| 354 | switch (equation) { | ||
| 355 | case Maxwell::Blend::Equation::Add: | ||
| 356 | case Maxwell::Blend::Equation::AddGL: | ||
| 357 | return vk::BlendOp::eAdd; | ||
| 358 | case Maxwell::Blend::Equation::Subtract: | ||
| 359 | case Maxwell::Blend::Equation::SubtractGL: | ||
| 360 | return vk::BlendOp::eSubtract; | ||
| 361 | case Maxwell::Blend::Equation::ReverseSubtract: | ||
| 362 | case Maxwell::Blend::Equation::ReverseSubtractGL: | ||
| 363 | return vk::BlendOp::eReverseSubtract; | ||
| 364 | case Maxwell::Blend::Equation::Min: | ||
| 365 | case Maxwell::Blend::Equation::MinGL: | ||
| 366 | return vk::BlendOp::eMin; | ||
| 367 | case Maxwell::Blend::Equation::Max: | ||
| 368 | case Maxwell::Blend::Equation::MaxGL: | ||
| 369 | return vk::BlendOp::eMax; | ||
| 370 | } | ||
| 371 | UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); | ||
| 372 | return {}; | ||
| 373 | } | ||
| 374 | |||
| 375 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { | ||
| 376 | switch (factor) { | ||
| 377 | case Maxwell::Blend::Factor::Zero: | ||
| 378 | case Maxwell::Blend::Factor::ZeroGL: | ||
| 379 | return vk::BlendFactor::eZero; | ||
| 380 | case Maxwell::Blend::Factor::One: | ||
| 381 | case Maxwell::Blend::Factor::OneGL: | ||
| 382 | return vk::BlendFactor::eOne; | ||
| 383 | case Maxwell::Blend::Factor::SourceColor: | ||
| 384 | case Maxwell::Blend::Factor::SourceColorGL: | ||
| 385 | return vk::BlendFactor::eSrcColor; | ||
| 386 | case Maxwell::Blend::Factor::OneMinusSourceColor: | ||
| 387 | case Maxwell::Blend::Factor::OneMinusSourceColorGL: | ||
| 388 | return vk::BlendFactor::eOneMinusSrcColor; | ||
| 389 | case Maxwell::Blend::Factor::SourceAlpha: | ||
| 390 | case Maxwell::Blend::Factor::SourceAlphaGL: | ||
| 391 | return vk::BlendFactor::eSrcAlpha; | ||
| 392 | case Maxwell::Blend::Factor::OneMinusSourceAlpha: | ||
| 393 | case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: | ||
| 394 | return vk::BlendFactor::eOneMinusSrcAlpha; | ||
| 395 | case Maxwell::Blend::Factor::DestAlpha: | ||
| 396 | case Maxwell::Blend::Factor::DestAlphaGL: | ||
| 397 | return vk::BlendFactor::eDstAlpha; | ||
| 398 | case Maxwell::Blend::Factor::OneMinusDestAlpha: | ||
| 399 | case Maxwell::Blend::Factor::OneMinusDestAlphaGL: | ||
| 400 | return vk::BlendFactor::eOneMinusDstAlpha; | ||
| 401 | case Maxwell::Blend::Factor::DestColor: | ||
| 402 | case Maxwell::Blend::Factor::DestColorGL: | ||
| 403 | return vk::BlendFactor::eDstColor; | ||
| 404 | case Maxwell::Blend::Factor::OneMinusDestColor: | ||
| 405 | case Maxwell::Blend::Factor::OneMinusDestColorGL: | ||
| 406 | return vk::BlendFactor::eOneMinusDstColor; | ||
| 407 | case Maxwell::Blend::Factor::SourceAlphaSaturate: | ||
| 408 | case Maxwell::Blend::Factor::SourceAlphaSaturateGL: | ||
| 409 | return vk::BlendFactor::eSrcAlphaSaturate; | ||
| 410 | case Maxwell::Blend::Factor::Source1Color: | ||
| 411 | case Maxwell::Blend::Factor::Source1ColorGL: | ||
| 412 | return vk::BlendFactor::eSrc1Color; | ||
| 413 | case Maxwell::Blend::Factor::OneMinusSource1Color: | ||
| 414 | case Maxwell::Blend::Factor::OneMinusSource1ColorGL: | ||
| 415 | return vk::BlendFactor::eOneMinusSrc1Color; | ||
| 416 | case Maxwell::Blend::Factor::Source1Alpha: | ||
| 417 | case Maxwell::Blend::Factor::Source1AlphaGL: | ||
| 418 | return vk::BlendFactor::eSrc1Alpha; | ||
| 419 | case Maxwell::Blend::Factor::OneMinusSource1Alpha: | ||
| 420 | case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: | ||
| 421 | return vk::BlendFactor::eOneMinusSrc1Alpha; | ||
| 422 | case Maxwell::Blend::Factor::ConstantColor: | ||
| 423 | case Maxwell::Blend::Factor::ConstantColorGL: | ||
| 424 | return vk::BlendFactor::eConstantColor; | ||
| 425 | case Maxwell::Blend::Factor::OneMinusConstantColor: | ||
| 426 | case Maxwell::Blend::Factor::OneMinusConstantColorGL: | ||
| 427 | return vk::BlendFactor::eOneMinusConstantColor; | ||
| 428 | case Maxwell::Blend::Factor::ConstantAlpha: | ||
| 429 | case Maxwell::Blend::Factor::ConstantAlphaGL: | ||
| 430 | return vk::BlendFactor::eConstantAlpha; | ||
| 431 | case Maxwell::Blend::Factor::OneMinusConstantAlpha: | ||
| 432 | case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: | ||
| 433 | return vk::BlendFactor::eOneMinusConstantAlpha; | ||
| 434 | } | ||
| 435 | UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); | ||
| 436 | return {}; | ||
| 437 | } | ||
| 438 | |||
| 439 | vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { | ||
| 440 | switch (front_face) { | ||
| 441 | case Maxwell::Cull::FrontFace::ClockWise: | ||
| 442 | return vk::FrontFace::eClockwise; | ||
| 443 | case Maxwell::Cull::FrontFace::CounterClockWise: | ||
| 444 | return vk::FrontFace::eCounterClockwise; | ||
| 445 | } | ||
| 446 | UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); | ||
| 447 | return {}; | ||
| 448 | } | ||
| 449 | |||
| 450 | vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { | ||
| 451 | switch (cull_face) { | ||
| 452 | case Maxwell::Cull::CullFace::Front: | ||
| 453 | return vk::CullModeFlagBits::eFront; | ||
| 454 | case Maxwell::Cull::CullFace::Back: | ||
| 455 | return vk::CullModeFlagBits::eBack; | ||
| 456 | case Maxwell::Cull::CullFace::FrontAndBack: | ||
| 457 | return vk::CullModeFlagBits::eFrontAndBack; | ||
| 458 | } | ||
| 459 | UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); | ||
| 460 | return {}; | ||
| 461 | } | ||
| 462 | |||
| 463 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { | ||
| 464 | switch (swizzle) { | ||
| 465 | case Tegra::Texture::SwizzleSource::Zero: | ||
| 466 | return vk::ComponentSwizzle::eZero; | ||
| 467 | case Tegra::Texture::SwizzleSource::R: | ||
| 468 | return vk::ComponentSwizzle::eR; | ||
| 469 | case Tegra::Texture::SwizzleSource::G: | ||
| 470 | return vk::ComponentSwizzle::eG; | ||
| 471 | case Tegra::Texture::SwizzleSource::B: | ||
| 472 | return vk::ComponentSwizzle::eB; | ||
| 473 | case Tegra::Texture::SwizzleSource::A: | ||
| 474 | return vk::ComponentSwizzle::eA; | ||
| 475 | case Tegra::Texture::SwizzleSource::OneInt: | ||
| 476 | case Tegra::Texture::SwizzleSource::OneFloat: | ||
| 477 | return vk::ComponentSwizzle::eOne; | ||
| 478 | } | ||
| 479 | UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); | ||
| 480 | return {}; | ||
| 481 | } | ||
| 482 | |||
| 483 | } // namespace Vulkan::MaxwellToVK | ||
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h new file mode 100644 index 000000000..4cadc0721 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/surface.h" | ||
| 13 | #include "video_core/textures/texture.h" | ||
| 14 | |||
| 15 | namespace Vulkan::MaxwellToVK { | ||
| 16 | |||
| 17 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 18 | using PixelFormat = VideoCore::Surface::PixelFormat; | ||
| 19 | using ComponentType = VideoCore::Surface::ComponentType; | ||
| 20 | |||
| 21 | namespace Sampler { | ||
| 22 | |||
| 23 | vk::Filter Filter(Tegra::Texture::TextureFilter filter); | ||
| 24 | |||
| 25 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); | ||
| 26 | |||
| 27 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode); | ||
| 28 | |||
| 29 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); | ||
| 30 | |||
| 31 | } // namespace Sampler | ||
| 32 | |||
| 33 | std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, | ||
| 34 | PixelFormat pixel_format, ComponentType component_type); | ||
| 35 | |||
| 36 | vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); | ||
| 37 | |||
| 38 | vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); | ||
| 39 | |||
| 40 | vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); | ||
| 41 | |||
| 42 | vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | ||
| 43 | |||
| 44 | vk::IndexType IndexFormat(Maxwell::IndexFormat index_format); | ||
| 45 | |||
| 46 | vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); | ||
| 47 | |||
| 48 | vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); | ||
| 49 | |||
| 50 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); | ||
| 51 | |||
| 52 | vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); | ||
| 53 | |||
| 54 | vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); | ||
| 55 | |||
| 56 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | ||
| 57 | |||
| 58 | } // namespace Vulkan::MaxwellToVK | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 18b7b94a1..4a33a6c84 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <tuple> | 8 | #include <tuple> |
| 9 | 9 | ||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "core/core.h" | 11 | #include "common/assert.h" |
| 12 | #include "core/memory.h" | 12 | #include "core/memory.h" |
| 13 | #include "video_core/renderer_vulkan/declarations.h" | 13 | #include "video_core/renderer_vulkan/declarations.h" |
| 14 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 14 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 6cbe21202..d8e916f31 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -44,9 +44,9 @@ struct CachedBufferEntry final : public RasterizerCacheObject { | |||
| 44 | 44 | ||
| 45 | class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | 45 | class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { |
| 46 | public: | 46 | public: |
| 47 | explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, VideoCore::RasterizerInterface& rasterizer, | 47 | explicit VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, |
| 48 | const VKDevice& device, VKMemoryManager& memory_manager, | 48 | VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, |
| 49 | VKScheduler& scheduler, u64 size); | 49 | VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size); |
| 50 | ~VKBufferCache(); | 50 | ~VKBufferCache(); |
| 51 | 51 | ||
| 52 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | 52 | /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been |
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 78a4e5f0e..00242ecbe 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag | |||
| 122 | FormatType format_type) const { | 122 | FormatType format_type) const { |
| 123 | const auto it = format_properties.find(wanted_format); | 123 | const auto it = format_properties.find(wanted_format); |
| 124 | if (it == format_properties.end()) { | 124 | if (it == format_properties.end()) { |
| 125 | LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", | 125 | LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); |
| 126 | static_cast<u32>(wanted_format)); | ||
| 127 | UNREACHABLE(); | 126 | UNREACHABLE(); |
| 128 | return true; | 127 | return true; |
| 129 | } | 128 | } |
| @@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( | |||
| 219 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); | 218 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); |
| 220 | }; | 219 | }; |
| 221 | AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); | 220 | AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); |
| 222 | AddFormatQuery(vk::Format::eR5G6B5UnormPack16); | 221 | AddFormatQuery(vk::Format::eB5G6R5UnormPack16); |
| 222 | AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); | ||
| 223 | AddFormatQuery(vk::Format::eR8G8B8A8Srgb); | ||
| 224 | AddFormatQuery(vk::Format::eR8Unorm); | ||
| 223 | AddFormatQuery(vk::Format::eD32Sfloat); | 225 | AddFormatQuery(vk::Format::eD32Sfloat); |
| 226 | AddFormatQuery(vk::Format::eD16Unorm); | ||
| 224 | AddFormatQuery(vk::Format::eD16UnormS8Uint); | 227 | AddFormatQuery(vk::Format::eD16UnormS8Uint); |
| 225 | AddFormatQuery(vk::Format::eD24UnormS8Uint); | 228 | AddFormatQuery(vk::Format::eD24UnormS8Uint); |
| 226 | AddFormatQuery(vk::Format::eD32SfloatS8Uint); | 229 | AddFormatQuery(vk::Format::eD32SfloatS8Uint); |
| 230 | AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); | ||
| 231 | AddFormatQuery(vk::Format::eBc2UnormBlock); | ||
| 232 | AddFormatQuery(vk::Format::eBc3UnormBlock); | ||
| 233 | AddFormatQuery(vk::Format::eBc4UnormBlock); | ||
| 227 | 234 | ||
| 228 | return format_properties; | 235 | return format_properties; |
| 229 | } | 236 | } |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 740ac3118..e4c438792 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 165 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | 165 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |
| 166 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | 166 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
| 167 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | 167 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
| 168 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | ||
| 168 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | 169 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, |
| 169 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | 170 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, |
| 170 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | 171 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 38f01ca50..ea3c71eed 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -17,24 +17,6 @@ using Tegra::Shader::Attribute; | |||
| 17 | using Tegra::Shader::Instruction; | 17 | using Tegra::Shader::Instruction; |
| 18 | using Tegra::Shader::OpCode; | 18 | using Tegra::Shader::OpCode; |
| 19 | using Tegra::Shader::Register; | 19 | using Tegra::Shader::Register; |
| 20 | using Tegra::Shader::TextureMiscMode; | ||
| 21 | using Tegra::Shader::TextureProcessMode; | ||
| 22 | using Tegra::Shader::TextureType; | ||
| 23 | |||
| 24 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 25 | switch (texture_type) { | ||
| 26 | case TextureType::Texture1D: | ||
| 27 | return 1; | ||
| 28 | case TextureType::Texture2D: | ||
| 29 | return 2; | ||
| 30 | case TextureType::Texture3D: | ||
| 31 | case TextureType::TextureCube: | ||
| 32 | return 3; | ||
| 33 | default: | ||
| 34 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | 20 | ||
| 39 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | 21 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
| 40 | const Instruction instr = {program_code[pc]}; | 22 | const Instruction instr = {program_code[pc]}; |
| @@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 247 | } | 229 | } |
| 248 | break; | 230 | break; |
| 249 | } | 231 | } |
| 250 | case OpCode::Id::TEX: { | ||
| 251 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 252 | "AOFFI is not implemented"); | ||
| 253 | |||
| 254 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 255 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 256 | } | ||
| 257 | |||
| 258 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 259 | const bool is_array = instr.tex.array != 0; | ||
| 260 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 261 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 262 | WriteTexInstructionFloat( | ||
| 263 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | case OpCode::Id::TEXS: { | ||
| 267 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 268 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 269 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 270 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 271 | |||
| 272 | if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 273 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | ||
| 274 | } | ||
| 275 | |||
| 276 | const Node4 components = | ||
| 277 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 278 | |||
| 279 | if (instr.texs.fp32_flag) { | ||
| 280 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 281 | } else { | ||
| 282 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 283 | } | ||
| 284 | break; | ||
| 285 | } | ||
| 286 | case OpCode::Id::TLD4: { | ||
| 287 | ASSERT(instr.tld4.array == 0); | ||
| 288 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 289 | "AOFFI is not implemented"); | ||
| 290 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 291 | "NDV is not implemented"); | ||
| 292 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||
| 293 | "PTP is not implemented"); | ||
| 294 | |||
| 295 | if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 296 | LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); | ||
| 297 | } | ||
| 298 | |||
| 299 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 300 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 301 | const bool is_array = instr.tld4.array != 0; | ||
| 302 | WriteTexInstructionFloat(bb, instr, | ||
| 303 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||
| 304 | break; | ||
| 305 | } | ||
| 306 | case OpCode::Id::TLD4S: { | ||
| 307 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 308 | "AOFFI is not implemented"); | ||
| 309 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 310 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | ||
| 311 | } | ||
| 312 | |||
| 313 | const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 314 | const Node op_a = GetRegister(instr.gpr8); | ||
| 315 | const Node op_b = GetRegister(instr.gpr20); | ||
| 316 | |||
| 317 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 318 | std::vector<Node> coords; | ||
| 319 | if (depth_compare) { | ||
| 320 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 321 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 322 | coords.push_back(op_a); | ||
| 323 | coords.push_back(op_y); | ||
| 324 | coords.push_back(op_b); | ||
| 325 | } else { | ||
| 326 | coords.push_back(op_a); | ||
| 327 | coords.push_back(op_b); | ||
| 328 | } | ||
| 329 | std::vector<Node> extras; | ||
| 330 | extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | ||
| 331 | |||
| 332 | const auto& sampler = | ||
| 333 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | ||
| 334 | |||
| 335 | Node4 values; | ||
| 336 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 337 | auto coords_copy = coords; | ||
| 338 | MetaTexture meta{sampler, {}, {}, extras, element}; | ||
| 339 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 340 | } | ||
| 341 | |||
| 342 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 343 | break; | ||
| 344 | } | ||
| 345 | case OpCode::Id::TXQ: { | ||
| 346 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 347 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | ||
| 348 | } | ||
| 349 | |||
| 350 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 351 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 352 | // uses. This must be fixed at a later instance. | ||
| 353 | const auto& sampler = | ||
| 354 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 355 | |||
| 356 | u32 indexer = 0; | ||
| 357 | switch (instr.txq.query_type) { | ||
| 358 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 359 | for (u32 element = 0; element < 4; ++element) { | ||
| 360 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 361 | continue; | ||
| 362 | } | ||
| 363 | MetaTexture meta{sampler, {}, {}, {}, element}; | ||
| 364 | const Node value = | ||
| 365 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||
| 366 | SetTemporal(bb, indexer++, value); | ||
| 367 | } | ||
| 368 | for (u32 i = 0; i < indexer; ++i) { | ||
| 369 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 370 | } | ||
| 371 | break; | ||
| 372 | } | ||
| 373 | default: | ||
| 374 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 375 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 376 | } | ||
| 377 | break; | ||
| 378 | } | ||
| 379 | case OpCode::Id::TMML: { | ||
| 380 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 381 | "NDV is not implemented"); | ||
| 382 | |||
| 383 | if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 384 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 385 | } | ||
| 386 | |||
| 387 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 388 | const bool is_array = instr.tmml.array != 0; | ||
| 389 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 390 | |||
| 391 | std::vector<Node> coords; | ||
| 392 | |||
| 393 | // TODO: Add coordinates for different samplers once other texture types are implemented. | ||
| 394 | switch (texture_type) { | ||
| 395 | case TextureType::Texture1D: | ||
| 396 | coords.push_back(GetRegister(instr.gpr8)); | ||
| 397 | break; | ||
| 398 | case TextureType::Texture2D: | ||
| 399 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 400 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 401 | break; | ||
| 402 | default: | ||
| 403 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 404 | |||
| 405 | // Fallback to interpreting as a 2D texture for now | ||
| 406 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 407 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 408 | texture_type = TextureType::Texture2D; | ||
| 409 | } | ||
| 410 | |||
| 411 | for (u32 element = 0; element < 2; ++element) { | ||
| 412 | auto params = coords; | ||
| 413 | MetaTexture meta{sampler, {}, {}, {}, element}; | ||
| 414 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | ||
| 415 | SetTemporal(bb, element, value); | ||
| 416 | } | ||
| 417 | for (u32 element = 0; element < 2; ++element) { | ||
| 418 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 419 | } | ||
| 420 | |||
| 421 | break; | ||
| 422 | } | ||
| 423 | case OpCode::Id::TLDS: { | ||
| 424 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 425 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 426 | |||
| 427 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 428 | "AOFFI is not implemented"); | ||
| 429 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 430 | |||
| 431 | if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 432 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | ||
| 433 | } | ||
| 434 | |||
| 435 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | ||
| 436 | break; | ||
| 437 | } | ||
| 438 | default: | 232 | default: |
| 439 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | 233 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); |
| 440 | } | 234 | } |
| @@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 442 | return pc; | 236 | return pc; |
| 443 | } | 237 | } |
| 444 | 238 | ||
| 445 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | ||
| 446 | bool is_array, bool is_shadow) { | ||
| 447 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 448 | |||
| 449 | // If this sampler has already been used, return the existing mapping. | ||
| 450 | const auto itr = | ||
| 451 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 452 | [&](const Sampler& entry) { return entry.GetOffset() == offset; }); | ||
| 453 | if (itr != used_samplers.end()) { | ||
| 454 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 455 | itr->IsShadow() == is_shadow); | ||
| 456 | return *itr; | ||
| 457 | } | ||
| 458 | |||
| 459 | // Otherwise create a new mapping for this sampler | ||
| 460 | const std::size_t next_index = used_samplers.size(); | ||
| 461 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||
| 462 | return *used_samplers.emplace(entry).first; | ||
| 463 | } | ||
| 464 | |||
| 465 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 466 | u32 dest_elem = 0; | ||
| 467 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 468 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 469 | // Skip disabled components | ||
| 470 | continue; | ||
| 471 | } | ||
| 472 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 473 | } | ||
| 474 | // After writing values in temporals, move them to the real registers | ||
| 475 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 476 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | ||
| 481 | const Node4& components) { | ||
| 482 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 483 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 484 | |||
| 485 | u32 dest_elem = 0; | ||
| 486 | for (u32 component = 0; component < 4; ++component) { | ||
| 487 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 488 | continue; | ||
| 489 | SetTemporal(bb, dest_elem++, components[component]); | ||
| 490 | } | ||
| 491 | |||
| 492 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 493 | if (i < 2) { | ||
| 494 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 495 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | ||
| 496 | } else { | ||
| 497 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 498 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 499 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | ||
| 500 | } | ||
| 501 | } | ||
| 502 | } | ||
| 503 | |||
| 504 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 505 | const Node4& components) { | ||
| 506 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 507 | // float instruction). | ||
| 508 | |||
| 509 | Node4 values; | ||
| 510 | u32 dest_elem = 0; | ||
| 511 | for (u32 component = 0; component < 4; ++component) { | ||
| 512 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 513 | continue; | ||
| 514 | values[dest_elem++] = components[component]; | ||
| 515 | } | ||
| 516 | if (dest_elem == 0) | ||
| 517 | return; | ||
| 518 | |||
| 519 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 520 | |||
| 521 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 522 | if (dest_elem <= 2) { | ||
| 523 | SetRegister(bb, instr.gpr0, first_value); | ||
| 524 | return; | ||
| 525 | } | ||
| 526 | |||
| 527 | SetTemporal(bb, 0, first_value); | ||
| 528 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 529 | |||
| 530 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 531 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 532 | } | ||
| 533 | |||
| 534 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 535 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 536 | Node array, Node depth_compare, u32 bias_offset) { | ||
| 537 | const bool is_array = array; | ||
| 538 | const bool is_shadow = depth_compare; | ||
| 539 | |||
| 540 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || | ||
| 541 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | ||
| 542 | "This method is not supported."); | ||
| 543 | |||
| 544 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); | ||
| 545 | |||
| 546 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 547 | process_mode == TextureProcessMode::LL || | ||
| 548 | process_mode == TextureProcessMode::LLA; | ||
| 549 | |||
| 550 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 551 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 552 | const bool gl_lod_supported = | ||
| 553 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || | ||
| 554 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); | ||
| 555 | |||
| 556 | const OperationCode read_method = | ||
| 557 | lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 558 | |||
| 559 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | ||
| 560 | |||
| 561 | std::vector<Node> extras; | ||
| 562 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | ||
| 563 | if (process_mode == TextureProcessMode::LZ) { | ||
| 564 | extras.push_back(Immediate(0.0f)); | ||
| 565 | } else { | ||
| 566 | // If present, lod or bias are always stored in the register indexed by the gpr20 | ||
| 567 | // field with an offset depending on the usage of the other registers | ||
| 568 | extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | ||
| 569 | } | ||
| 570 | } | ||
| 571 | |||
| 572 | Node4 values; | ||
| 573 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 574 | auto copy_coords = coords; | ||
| 575 | MetaTexture meta{sampler, array, depth_compare, extras, element}; | ||
| 576 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | ||
| 577 | } | ||
| 578 | |||
| 579 | return values; | ||
| 580 | } | ||
| 581 | |||
| 582 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 583 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 584 | const bool lod_bias_enabled = | ||
| 585 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 586 | |||
| 587 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 588 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 589 | // If enabled arrays index is always stored in the gpr8 field | ||
| 590 | const u64 array_register = instr.gpr8.Value(); | ||
| 591 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 592 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 593 | |||
| 594 | std::vector<Node> coords; | ||
| 595 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 596 | coords.push_back(GetRegister(coord_register + i)); | ||
| 597 | } | ||
| 598 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 599 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 600 | coords.push_back(Immediate(0.0f)); | ||
| 601 | } | ||
| 602 | |||
| 603 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 604 | |||
| 605 | Node dc{}; | ||
| 606 | if (depth_compare) { | ||
| 607 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 608 | // or bias are used | ||
| 609 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 610 | dc = GetRegister(depth_register); | ||
| 611 | } | ||
| 612 | |||
| 613 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); | ||
| 614 | } | ||
| 615 | |||
| 616 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 617 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 618 | const bool lod_bias_enabled = | ||
| 619 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 620 | |||
| 621 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 622 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 623 | // If enabled arrays index is always stored in the gpr8 field | ||
| 624 | const u64 array_register = instr.gpr8.Value(); | ||
| 625 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 626 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 627 | const u64 last_coord_register = | ||
| 628 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 629 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 630 | : coord_register + 1; | ||
| 631 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 632 | |||
| 633 | std::vector<Node> coords; | ||
| 634 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 635 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 636 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 637 | } | ||
| 638 | |||
| 639 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 640 | |||
| 641 | Node dc{}; | ||
| 642 | if (depth_compare) { | ||
| 643 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 644 | // or bias are used | ||
| 645 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 646 | dc = GetRegister(depth_register); | ||
| 647 | } | ||
| 648 | |||
| 649 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); | ||
| 650 | } | ||
| 651 | |||
| 652 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 653 | bool is_array) { | ||
| 654 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 655 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 656 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 657 | |||
| 658 | // If enabled arrays index is always stored in the gpr8 field | ||
| 659 | const u64 array_register = instr.gpr8.Value(); | ||
| 660 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 661 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 662 | |||
| 663 | std::vector<Node> coords; | ||
| 664 | for (size_t i = 0; i < coord_count; ++i) | ||
| 665 | coords.push_back(GetRegister(coord_register + i)); | ||
| 666 | |||
| 667 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 668 | |||
| 669 | Node4 values; | ||
| 670 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 671 | auto coords_copy = coords; | ||
| 672 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element}; | ||
| 673 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 674 | } | ||
| 675 | |||
| 676 | return values; | ||
| 677 | } | ||
| 678 | |||
| 679 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 680 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 681 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 682 | |||
| 683 | // If enabled arrays index is always stored in the gpr8 field | ||
| 684 | const u64 array_register = instr.gpr8.Value(); | ||
| 685 | // if is array gpr20 is used | ||
| 686 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 687 | |||
| 688 | const u64 last_coord_register = | ||
| 689 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 690 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 691 | : coord_register + 1; | ||
| 692 | |||
| 693 | std::vector<Node> coords; | ||
| 694 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 695 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 696 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 697 | } | ||
| 698 | |||
| 699 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 700 | // When lod is used always is in gpr20 | ||
| 701 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 702 | |||
| 703 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 704 | |||
| 705 | Node4 values; | ||
| 706 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 707 | auto coords_copy = coords; | ||
| 708 | MetaTexture meta{sampler, array, {}, {lod}, element}; | ||
| 709 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 710 | } | ||
| 711 | return values; | ||
| 712 | } | ||
| 713 | |||
| 714 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 715 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 716 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 717 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 718 | |||
| 719 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 720 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 721 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 722 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 723 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 724 | } | ||
| 725 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 726 | total_coord_count += | ||
| 727 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 728 | |||
| 729 | return {coord_count, total_coord_count}; | ||
| 730 | } | ||
| 731 | |||
| 732 | } // namespace VideoCommon::Shader | 239 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp new file mode 100644 index 000000000..a99ae19bf --- /dev/null +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -0,0 +1,534 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | using Tegra::Shader::TextureMiscMode; | ||
| 20 | using Tegra::Shader::TextureProcessMode; | ||
| 21 | using Tegra::Shader::TextureType; | ||
| 22 | |||
| 23 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 24 | switch (texture_type) { | ||
| 25 | case TextureType::Texture1D: | ||
| 26 | return 1; | ||
| 27 | case TextureType::Texture2D: | ||
| 28 | return 2; | ||
| 29 | case TextureType::Texture3D: | ||
| 30 | case TextureType::TextureCube: | ||
| 31 | return 3; | ||
| 32 | default: | ||
| 33 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 34 | return 0; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||
| 39 | const Instruction instr = {program_code[pc]}; | ||
| 40 | const auto opcode = OpCode::Decode(instr); | ||
| 41 | |||
| 42 | switch (opcode->get().GetId()) { | ||
| 43 | case OpCode::Id::TEX: { | ||
| 44 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 45 | "AOFFI is not implemented"); | ||
| 46 | |||
| 47 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 48 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 49 | } | ||
| 50 | |||
| 51 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 52 | const bool is_array = instr.tex.array != 0; | ||
| 53 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 55 | WriteTexInstructionFloat( | ||
| 56 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||
| 57 | break; | ||
| 58 | } | ||
| 59 | case OpCode::Id::TEXS: { | ||
| 60 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 61 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 62 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 63 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 64 | |||
| 65 | if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 66 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | ||
| 67 | } | ||
| 68 | |||
| 69 | const Node4 components = | ||
| 70 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 71 | |||
| 72 | if (instr.texs.fp32_flag) { | ||
| 73 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 74 | } else { | ||
| 75 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 76 | } | ||
| 77 | break; | ||
| 78 | } | ||
| 79 | case OpCode::Id::TLD4: { | ||
| 80 | ASSERT(instr.tld4.array == 0); | ||
| 81 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 82 | "AOFFI is not implemented"); | ||
| 83 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 84 | "NDV is not implemented"); | ||
| 85 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||
| 86 | "PTP is not implemented"); | ||
| 87 | |||
| 88 | if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 89 | LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); | ||
| 90 | } | ||
| 91 | |||
| 92 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 93 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 94 | const bool is_array = instr.tld4.array != 0; | ||
| 95 | WriteTexInstructionFloat(bb, instr, | ||
| 96 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | case OpCode::Id::TLD4S: { | ||
| 100 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 101 | "AOFFI is not implemented"); | ||
| 102 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 103 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | ||
| 104 | } | ||
| 105 | |||
| 106 | const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 107 | const Node op_a = GetRegister(instr.gpr8); | ||
| 108 | const Node op_b = GetRegister(instr.gpr20); | ||
| 109 | |||
| 110 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 111 | std::vector<Node> coords; | ||
| 112 | if (depth_compare) { | ||
| 113 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 114 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 115 | coords.push_back(op_a); | ||
| 116 | coords.push_back(op_y); | ||
| 117 | coords.push_back(op_b); | ||
| 118 | } else { | ||
| 119 | coords.push_back(op_a); | ||
| 120 | coords.push_back(op_b); | ||
| 121 | } | ||
| 122 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | ||
| 123 | |||
| 124 | const auto& sampler = | ||
| 125 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | ||
| 126 | |||
| 127 | Node4 values; | ||
| 128 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 129 | auto coords_copy = coords; | ||
| 130 | MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; | ||
| 131 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 132 | } | ||
| 133 | |||
| 134 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | case OpCode::Id::TXQ: { | ||
| 138 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 139 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | ||
| 140 | } | ||
| 141 | |||
| 142 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 143 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 144 | // uses. This must be fixed at a later instance. | ||
| 145 | const auto& sampler = | ||
| 146 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 147 | |||
| 148 | u32 indexer = 0; | ||
| 149 | switch (instr.txq.query_type) { | ||
| 150 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 151 | for (u32 element = 0; element < 4; ++element) { | ||
| 152 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 153 | continue; | ||
| 154 | } | ||
| 155 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | ||
| 156 | const Node value = | ||
| 157 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||
| 158 | SetTemporal(bb, indexer++, value); | ||
| 159 | } | ||
| 160 | for (u32 i = 0; i < indexer; ++i) { | ||
| 161 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 162 | } | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | default: | ||
| 166 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 167 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 168 | } | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | case OpCode::Id::TMML: { | ||
| 172 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 173 | "NDV is not implemented"); | ||
| 174 | |||
| 175 | if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 176 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 177 | } | ||
| 178 | |||
| 179 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 180 | const bool is_array = instr.tmml.array != 0; | ||
| 181 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 182 | |||
| 183 | std::vector<Node> coords; | ||
| 184 | |||
| 185 | // TODO: Add coordinates for different samplers once other texture types are implemented. | ||
| 186 | switch (texture_type) { | ||
| 187 | case TextureType::Texture1D: | ||
| 188 | coords.push_back(GetRegister(instr.gpr8)); | ||
| 189 | break; | ||
| 190 | case TextureType::Texture2D: | ||
| 191 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 192 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 193 | break; | ||
| 194 | default: | ||
| 195 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 196 | |||
| 197 | // Fallback to interpreting as a 2D texture for now | ||
| 198 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 199 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 200 | texture_type = TextureType::Texture2D; | ||
| 201 | } | ||
| 202 | |||
| 203 | for (u32 element = 0; element < 2; ++element) { | ||
| 204 | auto params = coords; | ||
| 205 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | ||
| 206 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | ||
| 207 | SetTemporal(bb, element, value); | ||
| 208 | } | ||
| 209 | for (u32 element = 0; element < 2; ++element) { | ||
| 210 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 211 | } | ||
| 212 | |||
| 213 | break; | ||
| 214 | } | ||
| 215 | case OpCode::Id::TLDS: { | ||
| 216 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 217 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 218 | |||
| 219 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 220 | "AOFFI is not implemented"); | ||
| 221 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 222 | |||
| 223 | if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 224 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | ||
| 225 | } | ||
| 226 | |||
| 227 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | ||
| 228 | break; | ||
| 229 | } | ||
| 230 | default: | ||
| 231 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 232 | } | ||
| 233 | |||
| 234 | return pc; | ||
| 235 | } | ||
| 236 | |||
| 237 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | ||
| 238 | bool is_array, bool is_shadow) { | ||
| 239 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 240 | |||
| 241 | // If this sampler has already been used, return the existing mapping. | ||
| 242 | const auto itr = | ||
| 243 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 244 | [&](const Sampler& entry) { return entry.GetOffset() == offset; }); | ||
| 245 | if (itr != used_samplers.end()) { | ||
| 246 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 247 | itr->IsShadow() == is_shadow); | ||
| 248 | return *itr; | ||
| 249 | } | ||
| 250 | |||
| 251 | // Otherwise create a new mapping for this sampler | ||
| 252 | const std::size_t next_index = used_samplers.size(); | ||
| 253 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||
| 254 | return *used_samplers.emplace(entry).first; | ||
| 255 | } | ||
| 256 | |||
| 257 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 258 | u32 dest_elem = 0; | ||
| 259 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 260 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 261 | // Skip disabled components | ||
| 262 | continue; | ||
| 263 | } | ||
| 264 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 265 | } | ||
| 266 | // After writing values in temporals, move them to the real registers | ||
| 267 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 268 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 269 | } | ||
| 270 | } | ||
| 271 | |||
| 272 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | ||
| 273 | const Node4& components) { | ||
| 274 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 275 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 276 | |||
| 277 | u32 dest_elem = 0; | ||
| 278 | for (u32 component = 0; component < 4; ++component) { | ||
| 279 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 280 | continue; | ||
| 281 | SetTemporal(bb, dest_elem++, components[component]); | ||
| 282 | } | ||
| 283 | |||
| 284 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 285 | if (i < 2) { | ||
| 286 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 287 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | ||
| 288 | } else { | ||
| 289 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 290 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 291 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | ||
| 292 | } | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 297 | const Node4& components) { | ||
| 298 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 299 | // float instruction). | ||
| 300 | |||
| 301 | Node4 values; | ||
| 302 | u32 dest_elem = 0; | ||
| 303 | for (u32 component = 0; component < 4; ++component) { | ||
| 304 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 305 | continue; | ||
| 306 | values[dest_elem++] = components[component]; | ||
| 307 | } | ||
| 308 | if (dest_elem == 0) | ||
| 309 | return; | ||
| 310 | |||
| 311 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 312 | |||
| 313 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 314 | if (dest_elem <= 2) { | ||
| 315 | SetRegister(bb, instr.gpr0, first_value); | ||
| 316 | return; | ||
| 317 | } | ||
| 318 | |||
| 319 | SetTemporal(bb, 0, first_value); | ||
| 320 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 321 | |||
| 322 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 323 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 324 | } | ||
| 325 | |||
| 326 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 327 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 328 | Node array, Node depth_compare, u32 bias_offset) { | ||
| 329 | const bool is_array = array; | ||
| 330 | const bool is_shadow = depth_compare; | ||
| 331 | |||
| 332 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || | ||
| 333 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | ||
| 334 | "This method is not supported."); | ||
| 335 | |||
| 336 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); | ||
| 337 | |||
| 338 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 339 | process_mode == TextureProcessMode::LL || | ||
| 340 | process_mode == TextureProcessMode::LLA; | ||
| 341 | |||
| 342 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 343 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 344 | const bool gl_lod_supported = | ||
| 345 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || | ||
| 346 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); | ||
| 347 | |||
| 348 | const OperationCode read_method = | ||
| 349 | (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 350 | |||
| 351 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | ||
| 352 | |||
| 353 | Node bias = {}; | ||
| 354 | Node lod = {}; | ||
| 355 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | ||
| 356 | switch (process_mode) { | ||
| 357 | case TextureProcessMode::LZ: | ||
| 358 | lod = Immediate(0.0f); | ||
| 359 | break; | ||
| 360 | case TextureProcessMode::LB: | ||
| 361 | // If present, lod or bias are always stored in the register indexed by the gpr20 | ||
| 362 | // field with an offset depending on the usage of the other registers | ||
| 363 | bias = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 364 | break; | ||
| 365 | case TextureProcessMode::LL: | ||
| 366 | lod = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 367 | break; | ||
| 368 | default: | ||
| 369 | UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); | ||
| 370 | break; | ||
| 371 | } | ||
| 372 | } | ||
| 373 | |||
| 374 | Node4 values; | ||
| 375 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 376 | auto copy_coords = coords; | ||
| 377 | MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; | ||
| 378 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | ||
| 379 | } | ||
| 380 | |||
| 381 | return values; | ||
| 382 | } | ||
| 383 | |||
| 384 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 385 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 386 | const bool lod_bias_enabled = | ||
| 387 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 388 | |||
| 389 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 390 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 391 | // If enabled arrays index is always stored in the gpr8 field | ||
| 392 | const u64 array_register = instr.gpr8.Value(); | ||
| 393 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 394 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 395 | |||
| 396 | std::vector<Node> coords; | ||
| 397 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 398 | coords.push_back(GetRegister(coord_register + i)); | ||
| 399 | } | ||
| 400 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 401 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 402 | coords.push_back(Immediate(0.0f)); | ||
| 403 | } | ||
| 404 | |||
| 405 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 406 | |||
| 407 | Node dc{}; | ||
| 408 | if (depth_compare) { | ||
| 409 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 410 | // or bias are used | ||
| 411 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 412 | dc = GetRegister(depth_register); | ||
| 413 | } | ||
| 414 | |||
| 415 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); | ||
| 416 | } | ||
| 417 | |||
| 418 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 419 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 420 | const bool lod_bias_enabled = | ||
| 421 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 422 | |||
| 423 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 424 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 425 | // If enabled arrays index is always stored in the gpr8 field | ||
| 426 | const u64 array_register = instr.gpr8.Value(); | ||
| 427 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 428 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 429 | const u64 last_coord_register = | ||
| 430 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 431 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 432 | : coord_register + 1; | ||
| 433 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 434 | |||
| 435 | std::vector<Node> coords; | ||
| 436 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 437 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 438 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 439 | } | ||
| 440 | |||
| 441 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 442 | |||
| 443 | Node dc{}; | ||
| 444 | if (depth_compare) { | ||
| 445 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 446 | // or bias are used | ||
| 447 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 448 | dc = GetRegister(depth_register); | ||
| 449 | } | ||
| 450 | |||
| 451 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); | ||
| 452 | } | ||
| 453 | |||
| 454 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 455 | bool is_array) { | ||
| 456 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 457 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 458 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 459 | |||
| 460 | // If enabled arrays index is always stored in the gpr8 field | ||
| 461 | const u64 array_register = instr.gpr8.Value(); | ||
| 462 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 463 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 464 | |||
| 465 | std::vector<Node> coords; | ||
| 466 | for (size_t i = 0; i < coord_count; ++i) | ||
| 467 | coords.push_back(GetRegister(coord_register + i)); | ||
| 468 | |||
| 469 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 470 | |||
| 471 | Node4 values; | ||
| 472 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 473 | auto coords_copy = coords; | ||
| 474 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; | ||
| 475 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 476 | } | ||
| 477 | |||
| 478 | return values; | ||
| 479 | } | ||
| 480 | |||
| 481 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 482 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 483 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 484 | |||
| 485 | // If enabled arrays index is always stored in the gpr8 field | ||
| 486 | const u64 array_register = instr.gpr8.Value(); | ||
| 487 | // if is array gpr20 is used | ||
| 488 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 489 | |||
| 490 | const u64 last_coord_register = | ||
| 491 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 492 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 493 | : coord_register + 1; | ||
| 494 | |||
| 495 | std::vector<Node> coords; | ||
| 496 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 497 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 498 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 499 | } | ||
| 500 | |||
| 501 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 502 | // When lod is used always is in gpr20 | ||
| 503 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 504 | |||
| 505 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 506 | |||
| 507 | Node4 values; | ||
| 508 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 509 | auto coords_copy = coords; | ||
| 510 | MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; | ||
| 511 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 512 | } | ||
| 513 | return values; | ||
| 514 | } | ||
| 515 | |||
| 516 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 517 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 518 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 519 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 520 | |||
| 521 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 522 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 523 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 524 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 525 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 526 | } | ||
| 527 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 528 | total_coord_count += | ||
| 529 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 530 | |||
| 531 | return {coord_count, total_coord_count}; | ||
| 532 | } | ||
| 533 | |||
| 534 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 52c7f2c4e..5bc3a3900 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -290,7 +290,9 @@ struct MetaTexture { | |||
| 290 | const Sampler& sampler; | 290 | const Sampler& sampler; |
| 291 | Node array{}; | 291 | Node array{}; |
| 292 | Node depth_compare{}; | 292 | Node depth_compare{}; |
| 293 | std::vector<Node> extras; | 293 | Node bias{}; |
| 294 | Node lod{}; | ||
| 295 | Node component{}; | ||
| 294 | u32 element{}; | 296 | u32 element{}; |
| 295 | }; | 297 | }; |
| 296 | 298 | ||
| @@ -614,6 +616,7 @@ private: | |||
| 614 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | 616 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); |
| 615 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | 617 | u32 DecodeConversion(NodeBlock& bb, u32 pc); |
| 616 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | 618 | u32 DecodeMemory(NodeBlock& bb, u32 pc); |
| 619 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | ||
| 617 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); | 620 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); |
| 618 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); | 621 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); |
| 619 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); | 622 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); |
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 044ba116a..a7ac26d71 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -89,8 +89,6 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { | |||
| 89 | 89 | ||
| 90 | PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { | 90 | PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { |
| 91 | switch (format) { | 91 | switch (format) { |
| 92 | // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the | ||
| 93 | // gamma. | ||
| 94 | case Tegra::RenderTargetFormat::RGBA8_SRGB: | 92 | case Tegra::RenderTargetFormat::RGBA8_SRGB: |
| 95 | return PixelFormat::RGBA8_SRGB; | 93 | return PixelFormat::RGBA8_SRGB; |
| 96 | case Tegra::RenderTargetFormat::RGBA8_UNORM: | 94 | case Tegra::RenderTargetFormat::RGBA8_UNORM: |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index bc50a4876..b508d64e9 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -23,28 +23,12 @@ | |||
| 23 | 23 | ||
| 24 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/astc.h" |
| 25 | 25 | ||
| 26 | class BitStream { | 26 | class InputBitStream { |
| 27 | public: | 27 | public: |
| 28 | explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | 28 | explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0) |
| 29 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 29 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 30 | 30 | ||
| 31 | ~BitStream() = default; | 31 | ~InputBitStream() = default; |
| 32 | |||
| 33 | int GetBitsWritten() const { | ||
| 34 | return m_BitsWritten; | ||
| 35 | } | ||
| 36 | |||
| 37 | void WriteBitsR(unsigned int val, unsigned int nBits) { | ||
| 38 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 39 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | void WriteBits(unsigned int val, unsigned int nBits) { | ||
| 44 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 45 | WriteBit((val >> i) & 1); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | 32 | ||
| 49 | int GetBitsRead() const { | 33 | int GetBitsRead() const { |
| 50 | return m_BitsRead; | 34 | return m_BitsRead; |
| @@ -71,6 +55,38 @@ public: | |||
| 71 | } | 55 | } |
| 72 | 56 | ||
| 73 | private: | 57 | private: |
| 58 | const int m_NumBits; | ||
| 59 | const unsigned char* m_CurByte; | ||
| 60 | int m_NextBit = 0; | ||
| 61 | int m_BitsRead = 0; | ||
| 62 | |||
| 63 | bool done = false; | ||
| 64 | }; | ||
| 65 | |||
| 66 | class OutputBitStream { | ||
| 67 | public: | ||
| 68 | explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | ||
| 69 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | ||
| 70 | |||
| 71 | ~OutputBitStream() = default; | ||
| 72 | |||
| 73 | int GetBitsWritten() const { | ||
| 74 | return m_BitsWritten; | ||
| 75 | } | ||
| 76 | |||
| 77 | void WriteBitsR(unsigned int val, unsigned int nBits) { | ||
| 78 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 79 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | void WriteBits(unsigned int val, unsigned int nBits) { | ||
| 84 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 85 | WriteBit((val >> i) & 1); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | private: | ||
| 74 | void WriteBit(int b) { | 90 | void WriteBit(int b) { |
| 75 | 91 | ||
| 76 | if (done) | 92 | if (done) |
| @@ -238,8 +254,8 @@ public: | |||
| 238 | // Fills result with the values that are encoded in the given | 254 | // Fills result with the values that are encoded in the given |
| 239 | // bitstream. We must know beforehand what the maximum possible | 255 | // bitstream. We must know beforehand what the maximum possible |
| 240 | // value is, and how many values we're decoding. | 256 | // value is, and how many values we're decoding. |
| 241 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, | 257 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, |
| 242 | uint32_t maxRange, uint32_t nValues) { | 258 | InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { |
| 243 | // Determine encoding parameters | 259 | // Determine encoding parameters |
| 244 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); | 260 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); |
| 245 | 261 | ||
| @@ -267,7 +283,7 @@ public: | |||
| 267 | } | 283 | } |
| 268 | 284 | ||
| 269 | private: | 285 | private: |
| 270 | static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, | 286 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 271 | uint32_t nBitsPerValue) { | 287 | uint32_t nBitsPerValue) { |
| 272 | // Implement the algorithm in section C.2.12 | 288 | // Implement the algorithm in section C.2.12 |
| 273 | uint32_t m[5]; | 289 | uint32_t m[5]; |
| @@ -327,7 +343,7 @@ private: | |||
| 327 | } | 343 | } |
| 328 | } | 344 | } |
| 329 | 345 | ||
| 330 | static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, | 346 | static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 331 | uint32_t nBitsPerValue) { | 347 | uint32_t nBitsPerValue) { |
| 332 | // Implement the algorithm in section C.2.12 | 348 | // Implement the algorithm in section C.2.12 |
| 333 | uint32_t m[3]; | 349 | uint32_t m[3]; |
| @@ -406,7 +422,7 @@ struct TexelWeightParams { | |||
| 406 | } | 422 | } |
| 407 | }; | 423 | }; |
| 408 | 424 | ||
| 409 | static TexelWeightParams DecodeBlockInfo(BitStream& strm) { | 425 | static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { |
| 410 | TexelWeightParams params; | 426 | TexelWeightParams params; |
| 411 | 427 | ||
| 412 | // Read the entire block mode all at once | 428 | // Read the entire block mode all at once |
| @@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) { | |||
| 605 | return params; | 621 | return params; |
| 606 | } | 622 | } |
| 607 | 623 | ||
| 608 | static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, | 624 | static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, |
| 609 | uint32_t blockHeight) { | 625 | uint32_t blockHeight) { |
| 610 | // Don't actually care about the void extent, just read the bits... | 626 | // Don't actually care about the void extent, just read the bits... |
| 611 | for (int i = 0; i < 4; ++i) { | 627 | for (int i = 0; i < 4; ++i) { |
| @@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 821 | 837 | ||
| 822 | // We now have enough to decode our integer sequence. | 838 | // We now have enough to decode our integer sequence. |
| 823 | std::vector<IntegerEncodedValue> decodedColorValues; | 839 | std::vector<IntegerEncodedValue> decodedColorValues; |
| 824 | BitStream colorStream(data); | 840 | InputBitStream colorStream(data); |
| 825 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 841 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| 826 | 842 | ||
| 827 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | 843 | // Once we have the decoded values, we need to dequantize them to the 0-255 range |
| @@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1365 | #undef READ_INT_VALUES | 1381 | #undef READ_INT_VALUES |
| 1366 | } | 1382 | } |
| 1367 | 1383 | ||
| 1368 | static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | 1384 | static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, |
| 1369 | const uint32_t blockHeight, uint32_t* outBuf) { | 1385 | const uint32_t blockHeight, uint32_t* outBuf) { |
| 1370 | BitStream strm(inBuf); | 1386 | InputBitStream strm(inBuf); |
| 1371 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1387 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1372 | 1388 | ||
| 1373 | // Was there an error? | 1389 | // Was there an error? |
| @@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1421 | // Define color data. | 1437 | // Define color data. |
| 1422 | uint8_t colorEndpointData[16]; | 1438 | uint8_t colorEndpointData[16]; |
| 1423 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); | 1439 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); |
| 1424 | BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); | 1440 | OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); |
| 1425 | 1441 | ||
| 1426 | // Read extra config data... | 1442 | // Read extra config data... |
| 1427 | uint32_t baseCEM = 0; | 1443 | uint32_t baseCEM = 0; |
| @@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1549 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1565 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1550 | 1566 | ||
| 1551 | std::vector<IntegerEncodedValue> texelWeightValues; | 1567 | std::vector<IntegerEncodedValue> texelWeightValues; |
| 1552 | BitStream weightStream(texelWeightData); | 1568 | InputBitStream weightStream(texelWeightData); |
| 1553 | 1569 | ||
| 1554 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, | 1570 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, |
| 1555 | weightParams.m_MaxWeight, | 1571 | weightParams.m_MaxWeight, |
| @@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1597 | 1613 | ||
| 1598 | namespace Tegra::Texture::ASTC { | 1614 | namespace Tegra::Texture::ASTC { |
| 1599 | 1615 | ||
| 1600 | std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, | 1616 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, |
| 1601 | uint32_t depth, uint32_t block_width, uint32_t block_height) { | 1617 | uint32_t depth, uint32_t block_width, uint32_t block_height) { |
| 1602 | uint32_t blockIdx = 0; | 1618 | uint32_t blockIdx = 0; |
| 1603 | std::vector<uint8_t> outData(height * width * depth * 4); | 1619 | std::vector<uint8_t> outData(height * width * depth * 4); |
| @@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint | |||
| 1605 | for (uint32_t j = 0; j < height; j += block_height) { | 1621 | for (uint32_t j = 0; j < height; j += block_height) { |
| 1606 | for (uint32_t i = 0; i < width; i += block_width) { | 1622 | for (uint32_t i = 0; i < width; i += block_width) { |
| 1607 | 1623 | ||
| 1608 | uint8_t* blockPtr = data.data() + blockIdx * 16; | 1624 | const uint8_t* blockPtr = data + blockIdx * 16; |
| 1609 | 1625 | ||
| 1610 | // Blocks can be at most 12x12 | 1626 | // Blocks can be at most 12x12 |
| 1611 | uint32_t uncompData[144]; | 1627 | uint32_t uncompData[144]; |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index d419dd025..991cdba72 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | namespace Tegra::Texture::ASTC { | 10 | namespace Tegra::Texture::ASTC { |
| 11 | 11 | ||
| 12 | std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, | 12 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, |
| 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); | 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); |
| 14 | 14 | ||
| 15 | } // namespace Tegra::Texture::ASTC | 15 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp new file mode 100644 index 000000000..5e439f036 --- /dev/null +++ b/src/video_core/textures/convert.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstring> | ||
| 7 | #include <tuple> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/textures/astc.h" | ||
| 14 | #include "video_core/textures/convert.h" | ||
| 15 | |||
| 16 | namespace Tegra::Texture { | ||
| 17 | |||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | template <bool reverse> | ||
| 21 | void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 22 | union S8Z24 { | ||
| 23 | BitField<0, 24, u32> z24; | ||
| 24 | BitField<24, 8, u32> s8; | ||
| 25 | }; | ||
| 26 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 27 | |||
| 28 | union Z24S8 { | ||
| 29 | BitField<0, 8, u32> s8; | ||
| 30 | BitField<8, 24, u32> z24; | ||
| 31 | }; | ||
| 32 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 33 | |||
| 34 | S8Z24 s8z24_pixel{}; | ||
| 35 | Z24S8 z24s8_pixel{}; | ||
| 36 | constexpr auto bpp{ | ||
| 37 | VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)}; | ||
| 38 | for (std::size_t y = 0; y < height; ++y) { | ||
| 39 | for (std::size_t x = 0; x < width; ++x) { | ||
| 40 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 41 | if constexpr (reverse) { | ||
| 42 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 43 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 44 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 45 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 46 | } else { | ||
| 47 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 48 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 49 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 50 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 57 | SwapS8Z24ToZ24S8<false>(data, width, height); | ||
| 58 | } | ||
| 59 | |||
| 60 | static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | ||
| 61 | SwapS8Z24ToZ24S8<true>(data, width, height); | ||
| 62 | } | ||
| 63 | |||
| 64 | void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 65 | bool convert_astc, bool convert_s8z24) { | ||
| 66 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 67 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 68 | u32 block_width{}; | ||
| 69 | u32 block_height{}; | ||
| 70 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 71 | const std::vector<u8> rgba8_data = | ||
| 72 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | ||
| 73 | std::copy(rgba8_data.begin(), rgba8_data.end(), data); | ||
| 74 | |||
| 75 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | ||
| 76 | Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 81 | bool convert_astc, bool convert_s8z24) { | ||
| 82 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 83 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 84 | static_cast<u32>(pixel_format)); | ||
| 85 | UNREACHABLE(); | ||
| 86 | |||
| 87 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | ||
| 88 | Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Tegra::Texture \ No newline at end of file | ||
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h new file mode 100644 index 000000000..07cd8b5da --- /dev/null +++ b/src/video_core/textures/convert.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | |||
| 10 | namespace Tegra::Texture { | ||
| 11 | |||
| 12 | void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 13 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 14 | |||
| 15 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 16 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 17 | |||
| 18 | } // namespace Tegra::Texture \ No newline at end of file | ||
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 5db75de22..cad7340f5 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const | |||
| 103 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; | 103 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; |
| 104 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | 104 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; |
| 105 | const u32 pixel_index{out_x + pixel_base}; | 105 | const u32 pixel_index{out_x + pixel_base}; |
| 106 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | 106 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; |
| 107 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | 107 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; |
| 108 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); | 108 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); |
| 109 | } | 109 | } |
| 110 | pixel_base += stride_x; | 110 | pixel_base += stride_x; |
| @@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 154 | for (u32 xb = 0; xb < blocks_on_x; xb++) { | 154 | for (u32 xb = 0; xb < blocks_on_x; xb++) { |
| 155 | const u32 x_start = xb * block_x_elements; | 155 | const u32 x_start = xb * block_x_elements; |
| 156 | const u32 x_end = std::min(width, x_start + block_x_elements); | 156 | const u32 x_end = std::min(width, x_start + block_x_elements); |
| 157 | if (fast) { | 157 | if constexpr (fast) { |
| 158 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | 158 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, |
| 159 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | 159 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, |
| 160 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | 160 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 85b7e9f7b..65df86890 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() { | |||
| 16 | return 512; | 16 | return 512; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /** | 19 | /// Unswizzles a swizzled texture without changing its format. |
| 20 | * Unswizzles a swizzled texture without changing its format. | ||
| 21 | */ | ||
| 22 | void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, | 20 | void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, |
| 23 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 21 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 24 | u32 block_height = TICEntry::DefaultBlockHeight, | 22 | u32 block_height = TICEntry::DefaultBlockHeight, |
| 25 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | 23 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); |
| 26 | /** | 24 | |
| 27 | * Unswizzles a swizzled texture without changing its format. | 25 | /// Unswizzles a swizzled texture without changing its format. |
| 28 | */ | ||
| 29 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, | 26 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, |
| 30 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 27 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 31 | u32 block_height = TICEntry::DefaultBlockHeight, | 28 | u32 block_height = TICEntry::DefaultBlockHeight, |
| @@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | |||
| 37 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | 34 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 38 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); | 35 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); |
| 39 | 36 | ||
| 40 | /** | 37 | /// Decodes an unswizzled texture into a A8R8G8B8 texture. |
| 41 | * Decodes an unswizzled texture into a A8R8G8B8 texture. | ||
| 42 | */ | ||
| 43 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | 38 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, |
| 44 | u32 height); | 39 | u32 height); |
| 45 | 40 | ||
| 46 | /** | 41 | /// This function calculates the correct size of a texture depending if it's tiled or not. |
| 47 | * This function calculates the correct size of a texture depending if it's tiled or not. | ||
| 48 | */ | ||
| 49 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 42 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 50 | u32 block_height, u32 block_depth); | 43 | u32 block_height, u32 block_depth); |
| 51 | 44 | ||
| @@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 53 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 54 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 47 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |
| 55 | u32 block_height); | 48 | u32 block_height); |
| 49 | |||
| 56 | /// Copies a tiled subrectangle into a linear surface. | 50 | /// Copies a tiled subrectangle into a linear surface. |
| 57 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 51 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 58 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 52 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 0fc5530f2..b8675f702 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 8 | #include "common/bit_field.h" | 9 | #include "common/bit_field.h" |
| 9 | #include "common/common_funcs.h" | 10 | #include "common/common_funcs.h" |
| @@ -293,7 +294,7 @@ struct TSCEntry { | |||
| 293 | union { | 294 | union { |
| 294 | BitField<0, 2, TextureFilter> mag_filter; | 295 | BitField<0, 2, TextureFilter> mag_filter; |
| 295 | BitField<4, 2, TextureFilter> min_filter; | 296 | BitField<4, 2, TextureFilter> min_filter; |
| 296 | BitField<6, 2, TextureMipmapFilter> mip_filter; | 297 | BitField<6, 2, TextureMipmapFilter> mipmap_filter; |
| 297 | BitField<9, 1, u32> cubemap_interface_filtering; | 298 | BitField<9, 1, u32> cubemap_interface_filtering; |
| 298 | BitField<12, 13, u32> mip_lod_bias; | 299 | BitField<12, 13, u32> mip_lod_bias; |
| 299 | }; | 300 | }; |
| @@ -306,10 +307,33 @@ struct TSCEntry { | |||
| 306 | BitField<12, 8, u32> srgb_border_color_g; | 307 | BitField<12, 8, u32> srgb_border_color_g; |
| 307 | BitField<20, 8, u32> srgb_border_color_b; | 308 | BitField<20, 8, u32> srgb_border_color_b; |
| 308 | }; | 309 | }; |
| 309 | float border_color_r; | 310 | std::array<f32, 4> border_color; |
| 310 | float border_color_g; | 311 | |
| 311 | float border_color_b; | 312 | float GetMaxAnisotropy() const { |
| 312 | float border_color_a; | 313 | return static_cast<float>(1U << max_anisotropy); |
| 314 | } | ||
| 315 | |||
| 316 | float GetMinLod() const { | ||
| 317 | return static_cast<float>(min_lod_clamp) / 256.0f; | ||
| 318 | } | ||
| 319 | |||
| 320 | float GetMaxLod() const { | ||
| 321 | return static_cast<float>(max_lod_clamp) / 256.0f; | ||
| 322 | } | ||
| 323 | |||
| 324 | float GetLodBias() const { | ||
| 325 | // Sign extend the 13-bit value. | ||
| 326 | constexpr u32 mask = 1U << (13 - 1); | ||
| 327 | return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f; | ||
| 328 | } | ||
| 329 | |||
| 330 | std::array<float, 4> GetBorderColor() const { | ||
| 331 | if (srgb_conversion) { | ||
| 332 | return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f, | ||
| 333 | srgb_border_color_b / 255.0f, border_color[3]}; | ||
| 334 | } | ||
| 335 | return border_color; | ||
| 336 | } | ||
| 313 | }; | 337 | }; |
| 314 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); | 338 | static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); |
| 315 | 339 | ||
diff --git a/src/web_service/verify_login.h b/src/web_service/verify_login.h index 39db32dbb..821b345d7 100644 --- a/src/web_service/verify_login.h +++ b/src/web_service/verify_login.h | |||
| @@ -4,8 +4,6 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <functional> | ||
| 8 | #include <future> | ||
| 9 | #include <string> | 7 | #include <string> |
| 10 | 8 | ||
| 11 | namespace WebService { | 9 | namespace WebService { |
diff --git a/src/web_service/web_backend.cpp b/src/web_service/web_backend.cpp index b7737b615..40da1a4e2 100644 --- a/src/web_service/web_backend.cpp +++ b/src/web_service/web_backend.cpp | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "common/logging/log.h" | 11 | #include "common/logging/log.h" |
| 12 | #include "common/web_result.h" | 12 | #include "common/web_result.h" |
| 13 | #include "core/settings.h" | ||
| 14 | #include "web_service/web_backend.h" | 13 | #include "web_service/web_backend.h" |
| 15 | 14 | ||
| 16 | namespace WebService { | 15 | namespace WebService { |
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp index 6a9138d53..979b9ec14 100644 --- a/src/yuzu/applets/web_browser.cpp +++ b/src/yuzu/applets/web_browser.cpp | |||
| @@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"( | |||
| 56 | window.nx.endApplet = function() { | 56 | window.nx.endApplet = function() { |
| 57 | applet_done = true; | 57 | applet_done = true; |
| 58 | }; | 58 | }; |
| 59 | |||
| 60 | window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } }; | ||
| 59 | )"; | 61 | )"; |
| 60 | 62 | ||
| 61 | QString GetNXShimInjectionScript() { | 63 | QString GetNXShimInjectionScript() { |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index b2a087aa5..d2c97b1f8 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -20,10 +20,7 @@ | |||
| 20 | EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} | 20 | EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} |
| 21 | 21 | ||
| 22 | void EmuThread::run() { | 22 | void EmuThread::run() { |
| 23 | if (!Settings::values.use_multi_core) { | 23 | render_window->MakeCurrent(); |
| 24 | // Single core mode must acquire OpenGL context for entire emulation session | ||
| 25 | render_window->MakeCurrent(); | ||
| 26 | } | ||
| 27 | 24 | ||
| 28 | MicroProfileOnThreadCreate("EmuThread"); | 25 | MicroProfileOnThreadCreate("EmuThread"); |
| 29 | 26 | ||
| @@ -38,6 +35,11 @@ void EmuThread::run() { | |||
| 38 | 35 | ||
| 39 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); | 36 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); |
| 40 | 37 | ||
| 38 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 39 | // Release OpenGL context for the GPU thread | ||
| 40 | render_window->DoneCurrent(); | ||
| 41 | } | ||
| 42 | |||
| 41 | // holds whether the cpu was running during the last iteration, | 43 | // holds whether the cpu was running during the last iteration, |
| 42 | // so that the DebugModeLeft signal can be emitted before the | 44 | // so that the DebugModeLeft signal can be emitted before the |
| 43 | // next execution step | 45 | // next execution step |
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index c09a06520..c8b0a5ec0 100644 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp | |||
| @@ -53,8 +53,8 @@ void CompatDB::Submit() { | |||
| 53 | case CompatDBPage::Final: | 53 | case CompatDBPage::Final: |
| 54 | back(); | 54 | back(); |
| 55 | LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); | 55 | LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); |
| 56 | Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", | 56 | Core::System::GetInstance().TelemetrySession().AddField( |
| 57 | compatibility->checkedId()); | 57 | Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId()); |
| 58 | 58 | ||
| 59 | button(NextButton)->setEnabled(false); | 59 | button(NextButton)->setEnabled(false); |
| 60 | button(NextButton)->setText(tr("Submitting")); | 60 | button(NextButton)->setText(tr("Submitting")); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9546dadf..74dc6bb28 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -374,6 +374,8 @@ void Config::ReadValues() { | |||
| 374 | qt_config->value("use_disk_shader_cache", false).toBool(); | 374 | qt_config->value("use_disk_shader_cache", false).toBool(); |
| 375 | Settings::values.use_accurate_gpu_emulation = | 375 | Settings::values.use_accurate_gpu_emulation = |
| 376 | qt_config->value("use_accurate_gpu_emulation", false).toBool(); | 376 | qt_config->value("use_accurate_gpu_emulation", false).toBool(); |
| 377 | Settings::values.use_asynchronous_gpu_emulation = | ||
| 378 | qt_config->value("use_asynchronous_gpu_emulation", false).toBool(); | ||
| 377 | 379 | ||
| 378 | Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); | 380 | Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); |
| 379 | Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); | 381 | Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); |
| @@ -633,6 +635,8 @@ void Config::SaveValues() { | |||
| 633 | qt_config->setValue("frame_limit", Settings::values.frame_limit); | 635 | qt_config->setValue("frame_limit", Settings::values.frame_limit); |
| 634 | qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); | 636 | qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); |
| 635 | qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); | 637 | qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); |
| 638 | qt_config->setValue("use_asynchronous_gpu_emulation", | ||
| 639 | Settings::values.use_asynchronous_gpu_emulation); | ||
| 636 | 640 | ||
| 637 | // Cast to double because Qt's written float values are not human-readable | 641 | // Cast to double because Qt's written float values are not human-readable |
| 638 | qt_config->setValue("bg_red", (double)Settings::values.bg_red); | 642 | qt_config->setValue("bg_red", (double)Settings::values.bg_red); |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 0f5dd534b..dd1d67488 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() { | |||
| 75 | ui->frame_limit->setValue(Settings::values.frame_limit); | 75 | ui->frame_limit->setValue(Settings::values.frame_limit); |
| 76 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 76 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| 77 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | 77 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); |
| 78 | ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn()); | ||
| 79 | ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); | ||
| 78 | UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, | 80 | UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, |
| 79 | Settings::values.bg_blue)); | 81 | Settings::values.bg_blue)); |
| 80 | } | 82 | } |
| @@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() { | |||
| 86 | Settings::values.frame_limit = ui->frame_limit->value(); | 88 | Settings::values.frame_limit = ui->frame_limit->value(); |
| 87 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 89 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 88 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | 90 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); |
| 91 | Settings::values.use_asynchronous_gpu_emulation = | ||
| 92 | ui->use_asynchronous_gpu_emulation->isChecked(); | ||
| 89 | Settings::values.bg_red = static_cast<float>(bg_color.redF()); | 93 | Settings::values.bg_red = static_cast<float>(bg_color.redF()); |
| 90 | Settings::values.bg_green = static_cast<float>(bg_color.greenF()); | 94 | Settings::values.bg_green = static_cast<float>(bg_color.greenF()); |
| 91 | Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); | 95 | Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 824f5810a..c6767e0ca 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -64,6 +64,13 @@ | |||
| 64 | </widget> | 64 | </widget> |
| 65 | </item> | 65 | </item> |
| 66 | <item> | 66 | <item> |
| 67 | <widget class="QCheckBox" name="use_asynchronous_gpu_emulation"> | ||
| 68 | <property name="text"> | ||
| 69 | <string>Use asynchronous GPU emulation</string> | ||
| 70 | </property> | ||
| 71 | </widget> | ||
| 72 | </item> | ||
| 73 | <item> | ||
| 67 | <layout class="QHBoxLayout" name="horizontalLayout"> | 74 | <layout class="QHBoxLayout" name="horizontalLayout"> |
| 68 | <item> | 75 | <item> |
| 69 | <widget class="QLabel" name="label"> | 76 | <widget class="QLabel" name="label"> |
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index f50225d5f..06ad74ffe 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp | |||
| @@ -81,9 +81,8 @@ QString WaitTreeText::GetText() const { | |||
| 81 | return text; | 81 | return text; |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address) : mutex_address(mutex_address) { | 84 | WaitTreeMutexInfo::WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table) |
| 85 | const auto& handle_table = Core::CurrentProcess()->GetHandleTable(); | 85 | : mutex_address(mutex_address) { |
| 86 | |||
| 87 | mutex_value = Memory::Read32(mutex_address); | 86 | mutex_value = Memory::Read32(mutex_address); |
| 88 | owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); | 87 | owner_handle = static_cast<Kernel::Handle>(mutex_value & Kernel::Mutex::MutexOwnerMask); |
| 89 | owner = handle_table.Get<Kernel::Thread>(owner_handle); | 88 | owner = handle_table.Get<Kernel::Thread>(owner_handle); |
| @@ -316,7 +315,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const { | |||
| 316 | 315 | ||
| 317 | const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); | 316 | const VAddr mutex_wait_address = thread.GetMutexWaitAddress(); |
| 318 | if (mutex_wait_address != 0) { | 317 | if (mutex_wait_address != 0) { |
| 319 | list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address)); | 318 | const auto& handle_table = thread.GetOwnerProcess()->GetHandleTable(); |
| 319 | list.push_back(std::make_unique<WaitTreeMutexInfo>(mutex_wait_address, handle_table)); | ||
| 320 | } else { | 320 | } else { |
| 321 | list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); | 321 | list.push_back(std::make_unique<WaitTreeText>(tr("not waiting for mutex"))); |
| 322 | } | 322 | } |
diff --git a/src/yuzu/debugger/wait_tree.h b/src/yuzu/debugger/wait_tree.h index 365c3dbfe..62886609d 100644 --- a/src/yuzu/debugger/wait_tree.h +++ b/src/yuzu/debugger/wait_tree.h | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | class EmuThread; | 17 | class EmuThread; |
| 18 | 18 | ||
| 19 | namespace Kernel { | 19 | namespace Kernel { |
| 20 | class HandleTable; | ||
| 20 | class ReadableEvent; | 21 | class ReadableEvent; |
| 21 | class WaitObject; | 22 | class WaitObject; |
| 22 | class Thread; | 23 | class Thread; |
| @@ -72,7 +73,7 @@ public: | |||
| 72 | class WaitTreeMutexInfo : public WaitTreeExpandableItem { | 73 | class WaitTreeMutexInfo : public WaitTreeExpandableItem { |
| 73 | Q_OBJECT | 74 | Q_OBJECT |
| 74 | public: | 75 | public: |
| 75 | explicit WaitTreeMutexInfo(VAddr mutex_address); | 76 | explicit WaitTreeMutexInfo(VAddr mutex_address, const Kernel::HandleTable& handle_table); |
| 76 | ~WaitTreeMutexInfo() override; | 77 | ~WaitTreeMutexInfo() override; |
| 77 | 78 | ||
| 78 | QString GetText() const override; | 79 | QString GetText() const override; |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 1d460c189..41ba3c4c6 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "applets/profile_select.h" | 11 | #include "applets/profile_select.h" |
| 12 | #include "applets/software_keyboard.h" | 12 | #include "applets/software_keyboard.h" |
| 13 | #include "applets/web_browser.h" | 13 | #include "applets/web_browser.h" |
| 14 | #include "configuration/configure_input.h" | ||
| 14 | #include "configuration/configure_per_general.h" | 15 | #include "configuration/configure_per_general.h" |
| 15 | #include "core/file_sys/vfs.h" | 16 | #include "core/file_sys/vfs.h" |
| 16 | #include "core/file_sys/vfs_real.h" | 17 | #include "core/file_sys/vfs_real.h" |
| @@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view | |||
| 339 | .arg(QString::fromStdString(std::to_string(key_code)))); | 340 | .arg(QString::fromStdString(std::to_string(key_code)))); |
| 340 | }; | 341 | }; |
| 341 | 342 | ||
| 343 | QMessageBox::information( | ||
| 344 | this, tr("Exit"), | ||
| 345 | tr("To exit the web application, use the game provided controls to select exit, select the " | ||
| 346 | "'Exit Web Applet' option in the menu bar, or press the 'Enter' key.")); | ||
| 347 | |||
| 342 | bool running_exit_check = false; | 348 | bool running_exit_check = false; |
| 343 | while (!finished) { | 349 | while (!finished) { |
| 344 | QApplication::processEvents(); | 350 | QApplication::processEvents(); |
| @@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() { | |||
| 522 | Qt::ApplicationShortcut); | 528 | Qt::ApplicationShortcut); |
| 523 | hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", | 529 | hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", |
| 524 | QKeySequence(QKeySequence::Print)); | 530 | QKeySequence(QKeySequence::Print)); |
| 531 | hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10)); | ||
| 525 | 532 | ||
| 526 | hotkey_registry.LoadHotkeys(); | 533 | hotkey_registry.LoadHotkeys(); |
| 527 | 534 | ||
| @@ -561,7 +568,10 @@ void GMainWindow::InitializeHotkeys() { | |||
| 561 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; | 568 | Settings::values.use_frame_limit = !Settings::values.use_frame_limit; |
| 562 | UpdateStatusBar(); | 569 | UpdateStatusBar(); |
| 563 | }); | 570 | }); |
| 564 | constexpr u16 SPEED_LIMIT_STEP = 5; | 571 | // TODO: Remove this comment/static whenever the next major release of |
| 572 | // MSVC occurs and we make it a requirement (see: | ||
| 573 | // https://developercommunity.visualstudio.com/content/problem/93922/constexprs-are-trying-to-be-captured-in-lambda-fun.html) | ||
| 574 | static constexpr u16 SPEED_LIMIT_STEP = 5; | ||
| 565 | connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), | 575 | connect(hotkey_registry.GetHotkey("Main Window", "Increase Speed Limit", this), |
| 566 | &QShortcut::activated, this, [&] { | 576 | &QShortcut::activated, this, [&] { |
| 567 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { | 577 | if (Settings::values.frame_limit < 9999 - SPEED_LIMIT_STEP) { |
| @@ -588,6 +598,12 @@ void GMainWindow::InitializeHotkeys() { | |||
| 588 | OnCaptureScreenshot(); | 598 | OnCaptureScreenshot(); |
| 589 | } | 599 | } |
| 590 | }); | 600 | }); |
| 601 | connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this), | ||
| 602 | &QShortcut::activated, this, [&] { | ||
| 603 | Settings::values.use_docked_mode = !Settings::values.use_docked_mode; | ||
| 604 | OnDockedModeChanged(!Settings::values.use_docked_mode, | ||
| 605 | Settings::values.use_docked_mode); | ||
| 606 | }); | ||
| 591 | } | 607 | } |
| 592 | 608 | ||
| 593 | void GMainWindow::SetDefaultUIGeometry() { | 609 | void GMainWindow::SetDefaultUIGeometry() { |
| @@ -846,7 +862,7 @@ bool GMainWindow::LoadROM(const QString& filename) { | |||
| 846 | } | 862 | } |
| 847 | game_path = filename; | 863 | game_path = filename; |
| 848 | 864 | ||
| 849 | Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); | 865 | system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); |
| 850 | return true; | 866 | return true; |
| 851 | } | 867 | } |
| 852 | 868 | ||
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index ff05b3179..32e78049c 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -346,7 +346,7 @@ void Config::ReadValues() { | |||
| 346 | 346 | ||
| 347 | // Renderer | 347 | // Renderer |
| 348 | Settings::values.resolution_factor = | 348 | Settings::values.resolution_factor = |
| 349 | (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); | 349 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); |
| 350 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); | 350 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); |
| 351 | Settings::values.frame_limit = | 351 | Settings::values.frame_limit = |
| 352 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); | 352 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); |
| @@ -354,17 +354,20 @@ void Config::ReadValues() { | |||
| 354 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | 354 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); |
| 355 | Settings::values.use_accurate_gpu_emulation = | 355 | Settings::values.use_accurate_gpu_emulation = |
| 356 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | 356 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); |
| 357 | Settings::values.use_asynchronous_gpu_emulation = | ||
| 358 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | ||
| 357 | 359 | ||
| 358 | Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); | 360 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); |
| 359 | Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); | 361 | Settings::values.bg_green = |
| 360 | Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); | 362 | static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0)); |
| 363 | Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0)); | ||
| 361 | 364 | ||
| 362 | // Audio | 365 | // Audio |
| 363 | Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); | 366 | Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); |
| 364 | Settings::values.enable_audio_stretching = | 367 | Settings::values.enable_audio_stretching = |
| 365 | sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); | 368 | sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); |
| 366 | Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); | 369 | Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); |
| 367 | Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); | 370 | Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1)); |
| 368 | 371 | ||
| 369 | Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); | 372 | Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); |
| 370 | 373 | ||
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a81986f8e..6538af098 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -118,6 +118,10 @@ use_disk_shader_cache = | |||
| 118 | # 0 (default): Off (fast), 1 : On (slow) | 118 | # 0 (default): Off (fast), 1 : On (slow) |
| 119 | use_accurate_gpu_emulation = | 119 | use_accurate_gpu_emulation = |
| 120 | 120 | ||
| 121 | # Whether to use asynchronous GPU emulation | ||
| 122 | # 0 : Off (slow), 1 (default): On (fast) | ||
| 123 | use_asynchronous_gpu_emulation = | ||
| 124 | |||
| 121 | # The clear color for the renderer. What shows up on the sides of the bottom screen. | 125 | # The clear color for the renderer. What shows up on the sides of the bottom screen. |
| 122 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. | 126 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. |
| 123 | bg_red = | 127 | bg_red = |
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index c34b5467f..c6c66a787 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp | |||
| @@ -216,7 +216,7 @@ int main(int argc, char** argv) { | |||
| 216 | } | 216 | } |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); | 219 | system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); |
| 220 | 220 | ||
| 221 | system.Renderer().Rasterizer().LoadDiskResources(); | 221 | system.Renderer().Rasterizer().LoadDiskResources(); |
| 222 | 222 | ||