diff options
Diffstat (limited to 'src')
76 files changed, 2136 insertions, 941 deletions
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp index 1da0b9f2a..7047ed9cf 100644 --- a/src/audio_core/cubeb_sink.cpp +++ b/src/audio_core/cubeb_sink.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "common/ring_buffer.h" | 12 | #include "common/ring_buffer.h" |
| 13 | #include "core/settings.h" | 13 | #include "core/settings.h" |
| 14 | 14 | ||
| 15 | #ifdef _MSC_VER | 15 | #ifdef _WIN32 |
| 16 | #include <objbase.h> | 16 | #include <objbase.h> |
| 17 | #endif | 17 | #endif |
| 18 | 18 | ||
| @@ -113,7 +113,7 @@ private: | |||
| 113 | 113 | ||
| 114 | CubebSink::CubebSink(std::string_view target_device_name) { | 114 | CubebSink::CubebSink(std::string_view target_device_name) { |
| 115 | // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows | 115 | // Cubeb requires COM to be initialized on the thread calling cubeb_init on Windows |
| 116 | #ifdef _MSC_VER | 116 | #ifdef _WIN32 |
| 117 | com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); | 117 | com_init_result = CoInitializeEx(nullptr, COINIT_MULTITHREADED); |
| 118 | #endif | 118 | #endif |
| 119 | 119 | ||
| @@ -152,7 +152,7 @@ CubebSink::~CubebSink() { | |||
| 152 | 152 | ||
| 153 | cubeb_destroy(ctx); | 153 | cubeb_destroy(ctx); |
| 154 | 154 | ||
| 155 | #ifdef _MSC_VER | 155 | #ifdef _WIN32 |
| 156 | if (SUCCEEDED(com_init_result)) { | 156 | if (SUCCEEDED(com_init_result)) { |
| 157 | CoUninitialize(); | 157 | CoUninitialize(); |
| 158 | } | 158 | } |
diff --git a/src/audio_core/cubeb_sink.h b/src/audio_core/cubeb_sink.h index 511df7bb1..7ce850f47 100644 --- a/src/audio_core/cubeb_sink.h +++ b/src/audio_core/cubeb_sink.h | |||
| @@ -26,7 +26,7 @@ private: | |||
| 26 | cubeb_devid output_device{}; | 26 | cubeb_devid output_device{}; |
| 27 | std::vector<SinkStreamPtr> sink_streams; | 27 | std::vector<SinkStreamPtr> sink_streams; |
| 28 | 28 | ||
| 29 | #ifdef _MSC_VER | 29 | #ifdef _WIN32 |
| 30 | u32 com_init_result = 0; | 30 | u32 com_init_result = 0; |
| 31 | #endif | 31 | #endif |
| 32 | }; | 32 | }; |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index bdd885273..3d30f0e3e 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp | |||
| 47 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" | 47 | "${VIDEO_CORE}/shader/decode/integer_set.cpp" |
| 48 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" | 48 | "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" |
| 49 | "${VIDEO_CORE}/shader/decode/memory.cpp" | 49 | "${VIDEO_CORE}/shader/decode/memory.cpp" |
| 50 | "${VIDEO_CORE}/shader/decode/texture.cpp" | ||
| 50 | "${VIDEO_CORE}/shader/decode/other.cpp" | 51 | "${VIDEO_CORE}/shader/decode/other.cpp" |
| 51 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" | 52 | "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" |
| 52 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" | 53 | "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" |
diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 21e07925d..7433c39ba 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h | |||
| @@ -111,12 +111,6 @@ | |||
| 111 | template <std::size_t Position, std::size_t Bits, typename T> | 111 | template <std::size_t Position, std::size_t Bits, typename T> |
| 112 | struct BitField { | 112 | struct BitField { |
| 113 | private: | 113 | private: |
| 114 | // We hide the copy assigment operator here, because the default copy | ||
| 115 | // assignment would copy the full storage value, rather than just the bits | ||
| 116 | // relevant to this particular bit field. | ||
| 117 | // We don't delete it because we want BitField to be trivially copyable. | ||
| 118 | constexpr BitField& operator=(const BitField&) = default; | ||
| 119 | |||
| 120 | // UnderlyingType is T for non-enum types and the underlying type of T if | 114 | // UnderlyingType is T for non-enum types and the underlying type of T if |
| 121 | // T is an enumeration. Note that T is wrapped within an enable_if in the | 115 | // T is an enumeration. Note that T is wrapped within an enable_if in the |
| 122 | // former case to workaround compile errors which arise when using | 116 | // former case to workaround compile errors which arise when using |
| @@ -163,9 +157,13 @@ public: | |||
| 163 | BitField(T val) = delete; | 157 | BitField(T val) = delete; |
| 164 | BitField& operator=(T val) = delete; | 158 | BitField& operator=(T val) = delete; |
| 165 | 159 | ||
| 166 | // Force default constructor to be created | 160 | constexpr BitField() noexcept = default; |
| 167 | // so that we can use this within unions | 161 | |
| 168 | constexpr BitField() = default; | 162 | constexpr BitField(const BitField&) noexcept = default; |
| 163 | constexpr BitField& operator=(const BitField&) noexcept = default; | ||
| 164 | |||
| 165 | constexpr BitField(BitField&&) noexcept = default; | ||
| 166 | constexpr BitField& operator=(BitField&&) noexcept = default; | ||
| 169 | 167 | ||
| 170 | constexpr FORCE_INLINE operator T() const { | 168 | constexpr FORCE_INLINE operator T() const { |
| 171 | return Value(); | 169 | return Value(); |
diff --git a/src/core/core.cpp b/src/core/core.cpp index 6dda20faa..eba2177d1 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp | |||
| @@ -36,7 +36,8 @@ | |||
| 36 | #include "frontend/applets/software_keyboard.h" | 36 | #include "frontend/applets/software_keyboard.h" |
| 37 | #include "frontend/applets/web_browser.h" | 37 | #include "frontend/applets/web_browser.h" |
| 38 | #include "video_core/debug_utils/debug_utils.h" | 38 | #include "video_core/debug_utils/debug_utils.h" |
| 39 | #include "video_core/gpu.h" | 39 | #include "video_core/gpu_asynch.h" |
| 40 | #include "video_core/gpu_synch.h" | ||
| 40 | #include "video_core/renderer_base.h" | 41 | #include "video_core/renderer_base.h" |
| 41 | #include "video_core/video_core.h" | 42 | #include "video_core/video_core.h" |
| 42 | 43 | ||
| @@ -129,10 +130,16 @@ struct System::Impl { | |||
| 129 | return ResultStatus::ErrorVideoCore; | 130 | return ResultStatus::ErrorVideoCore; |
| 130 | } | 131 | } |
| 131 | 132 | ||
| 132 | gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); | 133 | is_powered_on = true; |
| 134 | |||
| 135 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 136 | gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer); | ||
| 137 | } else { | ||
| 138 | gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer); | ||
| 139 | } | ||
| 133 | 140 | ||
| 134 | cpu_core_manager.Initialize(system); | 141 | cpu_core_manager.Initialize(system); |
| 135 | is_powered_on = true; | 142 | |
| 136 | LOG_DEBUG(Core, "Initialized OK"); | 143 | LOG_DEBUG(Core, "Initialized OK"); |
| 137 | 144 | ||
| 138 | // Reset counters and set time origin to current frame | 145 | // Reset counters and set time origin to current frame |
| @@ -183,13 +190,13 @@ struct System::Impl { | |||
| 183 | 190 | ||
| 184 | void Shutdown() { | 191 | void Shutdown() { |
| 185 | // Log last frame performance stats | 192 | // Log last frame performance stats |
| 186 | auto perf_results = GetAndResetPerfStats(); | 193 | const auto perf_results = GetAndResetPerfStats(); |
| 187 | Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", | 194 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_EmulationSpeed", |
| 188 | perf_results.emulation_speed * 100.0); | 195 | perf_results.emulation_speed * 100.0); |
| 189 | Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", | 196 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Framerate", |
| 190 | perf_results.game_fps); | 197 | perf_results.game_fps); |
| 191 | Telemetry().AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", | 198 | telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime", |
| 192 | perf_results.frametime * 1000.0); | 199 | perf_results.frametime * 1000.0); |
| 193 | 200 | ||
| 194 | is_powered_on = false; | 201 | is_powered_on = false; |
| 195 | 202 | ||
diff --git a/src/core/core.h b/src/core/core.h index d720013f7..ba76a41d8 100644 --- a/src/core/core.h +++ b/src/core/core.h | |||
| @@ -293,10 +293,6 @@ inline ARM_Interface& CurrentArmInterface() { | |||
| 293 | return System::GetInstance().CurrentArmInterface(); | 293 | return System::GetInstance().CurrentArmInterface(); |
| 294 | } | 294 | } |
| 295 | 295 | ||
| 296 | inline TelemetrySession& Telemetry() { | ||
| 297 | return System::GetInstance().TelemetrySession(); | ||
| 298 | } | ||
| 299 | |||
| 300 | inline Kernel::Process* CurrentProcess() { | 296 | inline Kernel::Process* CurrentProcess() { |
| 301 | return System::GetInstance().CurrentProcess(); | 297 | return System::GetInstance().CurrentProcess(); |
| 302 | } | 298 | } |
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 54aa21a3a..1eefed6d0 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #endif | 11 | #endif |
| 12 | #include "core/arm/exclusive_monitor.h" | 12 | #include "core/arm/exclusive_monitor.h" |
| 13 | #include "core/arm/unicorn/arm_unicorn.h" | 13 | #include "core/arm/unicorn/arm_unicorn.h" |
| 14 | #include "core/core.h" | ||
| 14 | #include "core/core_cpu.h" | 15 | #include "core/core_cpu.h" |
| 15 | #include "core/core_timing.h" | 16 | #include "core/core_timing.h" |
| 16 | #include "core/hle/kernel/scheduler.h" | 17 | #include "core/hle/kernel/scheduler.h" |
| @@ -49,9 +50,9 @@ bool CpuBarrier::Rendezvous() { | |||
| 49 | return false; | 50 | return false; |
| 50 | } | 51 | } |
| 51 | 52 | ||
| 52 | Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, | 53 | Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, |
| 53 | CpuBarrier& cpu_barrier, std::size_t core_index) | 54 | std::size_t core_index) |
| 54 | : cpu_barrier{cpu_barrier}, core_timing{core_timing}, core_index{core_index} { | 55 | : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { |
| 55 | if (Settings::values.use_cpu_jit) { | 56 | if (Settings::values.use_cpu_jit) { |
| 56 | #ifdef ARCHITECTURE_x86_64 | 57 | #ifdef ARCHITECTURE_x86_64 |
| 57 | arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); | 58 | arm_interface = std::make_unique<ARM_Dynarmic>(core_timing, exclusive_monitor, core_index); |
| @@ -63,7 +64,7 @@ Cpu::Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, | |||
| 63 | arm_interface = std::make_unique<ARM_Unicorn>(core_timing); | 64 | arm_interface = std::make_unique<ARM_Unicorn>(core_timing); |
| 64 | } | 65 | } |
| 65 | 66 | ||
| 66 | scheduler = std::make_unique<Kernel::Scheduler>(*arm_interface); | 67 | scheduler = std::make_unique<Kernel::Scheduler>(system, *arm_interface); |
| 67 | } | 68 | } |
| 68 | 69 | ||
| 69 | Cpu::~Cpu() = default; | 70 | Cpu::~Cpu() = default; |
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index e2204c6b0..7589beb8c 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h | |||
| @@ -15,6 +15,10 @@ namespace Kernel { | |||
| 15 | class Scheduler; | 15 | class Scheduler; |
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | namespace Core { | ||
| 19 | class System; | ||
| 20 | } | ||
| 21 | |||
| 18 | namespace Core::Timing { | 22 | namespace Core::Timing { |
| 19 | class CoreTiming; | 23 | class CoreTiming; |
| 20 | } | 24 | } |
| @@ -45,8 +49,8 @@ private: | |||
| 45 | 49 | ||
| 46 | class Cpu { | 50 | class Cpu { |
| 47 | public: | 51 | public: |
| 48 | Cpu(Timing::CoreTiming& core_timing, ExclusiveMonitor& exclusive_monitor, | 52 | Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, |
| 49 | CpuBarrier& cpu_barrier, std::size_t core_index); | 53 | std::size_t core_index); |
| 50 | ~Cpu(); | 54 | ~Cpu(); |
| 51 | 55 | ||
| 52 | void RunLoop(bool tight_loop = true); | 56 | void RunLoop(bool tight_loop = true); |
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 2ddb3610d..93bc5619c 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp | |||
| @@ -27,8 +27,7 @@ void CpuCoreManager::Initialize(System& system) { | |||
| 27 | exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); | 27 | exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size()); |
| 28 | 28 | ||
| 29 | for (std::size_t index = 0; index < cores.size(); ++index) { | 29 | for (std::size_t index = 0; index < cores.size(); ++index) { |
| 30 | cores[index] = | 30 | cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index); |
| 31 | std::make_unique<Cpu>(system.CoreTiming(), *exclusive_monitor, *barrier, index); | ||
| 32 | } | 31 | } |
| 33 | 32 | ||
| 34 | // Create threads for CPU cores 1-3, and build thread_to_cpu map | 33 | // Create threads for CPU cores 1-3, and build thread_to_cpu map |
diff --git a/src/core/hle/ipc.h b/src/core/hle/ipc.h index ed84197b3..455d1f346 100644 --- a/src/core/hle/ipc.h +++ b/src/core/hle/ipc.h | |||
| @@ -4,10 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_funcs.h" | ||
| 7 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 8 | #include "common/swap.h" | 10 | #include "common/swap.h" |
| 9 | #include "core/hle/kernel/errors.h" | ||
| 10 | #include "core/memory.h" | ||
| 11 | 11 | ||
| 12 | namespace IPC { | 12 | namespace IPC { |
| 13 | 13 | ||
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h index 90f276ee8..079283830 100644 --- a/src/core/hle/ipc_helpers.h +++ b/src/core/hle/ipc_helpers.h | |||
| @@ -350,7 +350,7 @@ public: | |||
| 350 | template <class T> | 350 | template <class T> |
| 351 | std::shared_ptr<T> PopIpcInterface() { | 351 | std::shared_ptr<T> PopIpcInterface() { |
| 352 | ASSERT(context->Session()->IsDomain()); | 352 | ASSERT(context->Session()->IsDomain()); |
| 353 | ASSERT(context->GetDomainMessageHeader()->input_object_count > 0); | 353 | ASSERT(context->GetDomainMessageHeader().input_object_count > 0); |
| 354 | return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); | 354 | return context->GetDomainRequestHandler<T>(Pop<u32>() - 1); |
| 355 | } | 355 | } |
| 356 | }; | 356 | }; |
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index 704e82824..c17baa50a 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp | |||
| @@ -17,21 +17,11 @@ ClientSession::~ClientSession() { | |||
| 17 | // This destructor will be called automatically when the last ClientSession handle is closed by | 17 | // This destructor will be called automatically when the last ClientSession handle is closed by |
| 18 | // the emulated application. | 18 | // the emulated application. |
| 19 | 19 | ||
| 20 | // Local references to ServerSession and SessionRequestHandler are necessary to guarantee they | 20 | // A local reference to the ServerSession is necessary to guarantee it |
| 21 | // will be kept alive until after ClientDisconnected() returns. | 21 | // will be kept alive until after ClientDisconnected() returns. |
| 22 | SharedPtr<ServerSession> server = parent->server; | 22 | SharedPtr<ServerSession> server = parent->server; |
| 23 | if (server) { | 23 | if (server) { |
| 24 | std::shared_ptr<SessionRequestHandler> hle_handler = server->hle_handler; | 24 | server->ClientDisconnected(); |
| 25 | if (hle_handler) | ||
| 26 | hle_handler->ClientDisconnected(server); | ||
| 27 | |||
| 28 | // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set | ||
| 29 | // their WaitSynchronization result to 0xC920181A. | ||
| 30 | |||
| 31 | // Clean up the list of client threads with pending requests, they are unneeded now that the | ||
| 32 | // client endpoint is closed. | ||
| 33 | server->pending_requesting_threads.clear(); | ||
| 34 | server->currently_handling = nullptr; | ||
| 35 | } | 25 | } |
| 36 | 26 | ||
| 37 | parent->client = nullptr; | 27 | parent->client = nullptr; |
diff --git a/src/core/hle/kernel/client_session.h b/src/core/hle/kernel/client_session.h index 4c18de69c..b1f39aad7 100644 --- a/src/core/hle/kernel/client_session.h +++ b/src/core/hle/kernel/client_session.h | |||
| @@ -36,14 +36,15 @@ public: | |||
| 36 | 36 | ||
| 37 | ResultCode SendSyncRequest(SharedPtr<Thread> thread); | 37 | ResultCode SendSyncRequest(SharedPtr<Thread> thread); |
| 38 | 38 | ||
| 39 | std::string name; ///< Name of client port (optional) | 39 | private: |
| 40 | explicit ClientSession(KernelCore& kernel); | ||
| 41 | ~ClientSession() override; | ||
| 40 | 42 | ||
| 41 | /// The parent session, which links to the server endpoint. | 43 | /// The parent session, which links to the server endpoint. |
| 42 | std::shared_ptr<Session> parent; | 44 | std::shared_ptr<Session> parent; |
| 43 | 45 | ||
| 44 | private: | 46 | /// Name of the client session (optional) |
| 45 | explicit ClientSession(KernelCore& kernel); | 47 | std::string name; |
| 46 | ~ClientSession() override; | ||
| 47 | }; | 48 | }; |
| 48 | 49 | ||
| 49 | } // namespace Kernel | 50 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 5dd855db8..fe710eb6e 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp | |||
| @@ -86,7 +86,7 @@ HLERequestContext::~HLERequestContext() = default; | |||
| 86 | void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, | 86 | void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_le* src_cmdbuf, |
| 87 | bool incoming) { | 87 | bool incoming) { |
| 88 | IPC::RequestParser rp(src_cmdbuf); | 88 | IPC::RequestParser rp(src_cmdbuf); |
| 89 | command_header = std::make_shared<IPC::CommandHeader>(rp.PopRaw<IPC::CommandHeader>()); | 89 | command_header = rp.PopRaw<IPC::CommandHeader>(); |
| 90 | 90 | ||
| 91 | if (command_header->type == IPC::CommandType::Close) { | 91 | if (command_header->type == IPC::CommandType::Close) { |
| 92 | // Close does not populate the rest of the IPC header | 92 | // Close does not populate the rest of the IPC header |
| @@ -95,8 +95,7 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_ | |||
| 95 | 95 | ||
| 96 | // If handle descriptor is present, add size of it | 96 | // If handle descriptor is present, add size of it |
| 97 | if (command_header->enable_handle_descriptor) { | 97 | if (command_header->enable_handle_descriptor) { |
| 98 | handle_descriptor_header = | 98 | handle_descriptor_header = rp.PopRaw<IPC::HandleDescriptorHeader>(); |
| 99 | std::make_shared<IPC::HandleDescriptorHeader>(rp.PopRaw<IPC::HandleDescriptorHeader>()); | ||
| 100 | if (handle_descriptor_header->send_current_pid) { | 99 | if (handle_descriptor_header->send_current_pid) { |
| 101 | rp.Skip(2, false); | 100 | rp.Skip(2, false); |
| 102 | } | 101 | } |
| @@ -140,16 +139,15 @@ void HLERequestContext::ParseCommandBuffer(const HandleTable& handle_table, u32_ | |||
| 140 | // If this is an incoming message, only CommandType "Request" has a domain header | 139 | // If this is an incoming message, only CommandType "Request" has a domain header |
| 141 | // All outgoing domain messages have the domain header, if only incoming has it | 140 | // All outgoing domain messages have the domain header, if only incoming has it |
| 142 | if (incoming || domain_message_header) { | 141 | if (incoming || domain_message_header) { |
| 143 | domain_message_header = | 142 | domain_message_header = rp.PopRaw<IPC::DomainMessageHeader>(); |
| 144 | std::make_shared<IPC::DomainMessageHeader>(rp.PopRaw<IPC::DomainMessageHeader>()); | ||
| 145 | } else { | 143 | } else { |
| 146 | if (Session()->IsDomain()) | 144 | if (Session()->IsDomain()) { |
| 147 | LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); | 145 | LOG_WARNING(IPC, "Domain request has no DomainMessageHeader!"); |
| 146 | } | ||
| 148 | } | 147 | } |
| 149 | } | 148 | } |
| 150 | 149 | ||
| 151 | data_payload_header = | 150 | data_payload_header = rp.PopRaw<IPC::DataPayloadHeader>(); |
| 152 | std::make_shared<IPC::DataPayloadHeader>(rp.PopRaw<IPC::DataPayloadHeader>()); | ||
| 153 | 151 | ||
| 154 | data_payload_offset = rp.GetCurrentOffset(); | 152 | data_payload_offset = rp.GetCurrentOffset(); |
| 155 | 153 | ||
| @@ -264,11 +262,11 @@ ResultCode HLERequestContext::WriteToOutgoingCommandBuffer(Thread& thread) { | |||
| 264 | // Write the domain objects to the command buffer, these go after the raw untranslated data. | 262 | // Write the domain objects to the command buffer, these go after the raw untranslated data. |
| 265 | // TODO(Subv): This completely ignores C buffers. | 263 | // TODO(Subv): This completely ignores C buffers. |
| 266 | std::size_t domain_offset = size - domain_message_header->num_objects; | 264 | std::size_t domain_offset = size - domain_message_header->num_objects; |
| 267 | auto& request_handlers = server_session->domain_request_handlers; | ||
| 268 | 265 | ||
| 269 | for (auto& object : domain_objects) { | 266 | for (const auto& object : domain_objects) { |
| 270 | request_handlers.emplace_back(object); | 267 | server_session->AppendDomainRequestHandler(object); |
| 271 | dst_cmdbuf[domain_offset++] = static_cast<u32_le>(request_handlers.size()); | 268 | dst_cmdbuf[domain_offset++] = |
| 269 | static_cast<u32_le>(server_session->NumDomainRequestHandlers()); | ||
| 272 | } | 270 | } |
| 273 | } | 271 | } |
| 274 | 272 | ||
diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index cb1c5aff3..2bdd9f02c 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <optional> | ||
| 9 | #include <string> | 10 | #include <string> |
| 10 | #include <type_traits> | 11 | #include <type_traits> |
| 11 | #include <vector> | 12 | #include <vector> |
| @@ -15,6 +16,8 @@ | |||
| 15 | #include "core/hle/ipc.h" | 16 | #include "core/hle/ipc.h" |
| 16 | #include "core/hle/kernel/object.h" | 17 | #include "core/hle/kernel/object.h" |
| 17 | 18 | ||
| 19 | union ResultCode; | ||
| 20 | |||
| 18 | namespace Service { | 21 | namespace Service { |
| 19 | class ServiceFrameworkBase; | 22 | class ServiceFrameworkBase; |
| 20 | } | 23 | } |
| @@ -166,12 +169,12 @@ public: | |||
| 166 | return buffer_c_desciptors; | 169 | return buffer_c_desciptors; |
| 167 | } | 170 | } |
| 168 | 171 | ||
| 169 | const IPC::DomainMessageHeader* GetDomainMessageHeader() const { | 172 | const IPC::DomainMessageHeader& GetDomainMessageHeader() const { |
| 170 | return domain_message_header.get(); | 173 | return domain_message_header.value(); |
| 171 | } | 174 | } |
| 172 | 175 | ||
| 173 | bool HasDomainMessageHeader() const { | 176 | bool HasDomainMessageHeader() const { |
| 174 | return domain_message_header != nullptr; | 177 | return domain_message_header.has_value(); |
| 175 | } | 178 | } |
| 176 | 179 | ||
| 177 | /// Helper function to read a buffer using the appropriate buffer descriptor | 180 | /// Helper function to read a buffer using the appropriate buffer descriptor |
| @@ -208,14 +211,12 @@ public: | |||
| 208 | 211 | ||
| 209 | template <typename T> | 212 | template <typename T> |
| 210 | SharedPtr<T> GetCopyObject(std::size_t index) { | 213 | SharedPtr<T> GetCopyObject(std::size_t index) { |
| 211 | ASSERT(index < copy_objects.size()); | 214 | return DynamicObjectCast<T>(copy_objects.at(index)); |
| 212 | return DynamicObjectCast<T>(copy_objects[index]); | ||
| 213 | } | 215 | } |
| 214 | 216 | ||
| 215 | template <typename T> | 217 | template <typename T> |
| 216 | SharedPtr<T> GetMoveObject(std::size_t index) { | 218 | SharedPtr<T> GetMoveObject(std::size_t index) { |
| 217 | ASSERT(index < move_objects.size()); | 219 | return DynamicObjectCast<T>(move_objects.at(index)); |
| 218 | return DynamicObjectCast<T>(move_objects[index]); | ||
| 219 | } | 220 | } |
| 220 | 221 | ||
| 221 | void AddMoveObject(SharedPtr<Object> object) { | 222 | void AddMoveObject(SharedPtr<Object> object) { |
| @@ -232,7 +233,7 @@ public: | |||
| 232 | 233 | ||
| 233 | template <typename T> | 234 | template <typename T> |
| 234 | std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { | 235 | std::shared_ptr<T> GetDomainRequestHandler(std::size_t index) const { |
| 235 | return std::static_pointer_cast<T>(domain_request_handlers[index]); | 236 | return std::static_pointer_cast<T>(domain_request_handlers.at(index)); |
| 236 | } | 237 | } |
| 237 | 238 | ||
| 238 | void SetDomainRequestHandlers( | 239 | void SetDomainRequestHandlers( |
| @@ -272,10 +273,10 @@ private: | |||
| 272 | boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; | 273 | boost::container::small_vector<SharedPtr<Object>, 8> copy_objects; |
| 273 | boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; | 274 | boost::container::small_vector<std::shared_ptr<SessionRequestHandler>, 8> domain_objects; |
| 274 | 275 | ||
| 275 | std::shared_ptr<IPC::CommandHeader> command_header; | 276 | std::optional<IPC::CommandHeader> command_header; |
| 276 | std::shared_ptr<IPC::HandleDescriptorHeader> handle_descriptor_header; | 277 | std::optional<IPC::HandleDescriptorHeader> handle_descriptor_header; |
| 277 | std::shared_ptr<IPC::DataPayloadHeader> data_payload_header; | 278 | std::optional<IPC::DataPayloadHeader> data_payload_header; |
| 278 | std::shared_ptr<IPC::DomainMessageHeader> domain_message_header; | 279 | std::optional<IPC::DomainMessageHeader> domain_message_header; |
| 279 | std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; | 280 | std::vector<IPC::BufferDescriptorX> buffer_x_desciptors; |
| 280 | std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; | 281 | std::vector<IPC::BufferDescriptorABW> buffer_a_desciptors; |
| 281 | std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; | 282 | std::vector<IPC::BufferDescriptorABW> buffer_b_desciptors; |
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 44f30d070..5fccfd9f4 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp | |||
| @@ -19,7 +19,8 @@ namespace Kernel { | |||
| 19 | 19 | ||
| 20 | std::mutex Scheduler::scheduler_mutex; | 20 | std::mutex Scheduler::scheduler_mutex; |
| 21 | 21 | ||
| 22 | Scheduler::Scheduler(Core::ARM_Interface& cpu_core) : cpu_core(cpu_core) {} | 22 | Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core) |
| 23 | : cpu_core{cpu_core}, system{system} {} | ||
| 23 | 24 | ||
| 24 | Scheduler::~Scheduler() { | 25 | Scheduler::~Scheduler() { |
| 25 | for (auto& thread : thread_list) { | 26 | for (auto& thread : thread_list) { |
| @@ -61,7 +62,7 @@ Thread* Scheduler::PopNextReadyThread() { | |||
| 61 | 62 | ||
| 62 | void Scheduler::SwitchContext(Thread* new_thread) { | 63 | void Scheduler::SwitchContext(Thread* new_thread) { |
| 63 | Thread* const previous_thread = GetCurrentThread(); | 64 | Thread* const previous_thread = GetCurrentThread(); |
| 64 | Process* const previous_process = Core::CurrentProcess(); | 65 | Process* const previous_process = system.Kernel().CurrentProcess(); |
| 65 | 66 | ||
| 66 | UpdateLastContextSwitchTime(previous_thread, previous_process); | 67 | UpdateLastContextSwitchTime(previous_thread, previous_process); |
| 67 | 68 | ||
| @@ -94,8 +95,8 @@ void Scheduler::SwitchContext(Thread* new_thread) { | |||
| 94 | 95 | ||
| 95 | auto* const thread_owner_process = current_thread->GetOwnerProcess(); | 96 | auto* const thread_owner_process = current_thread->GetOwnerProcess(); |
| 96 | if (previous_process != thread_owner_process) { | 97 | if (previous_process != thread_owner_process) { |
| 97 | Core::System::GetInstance().Kernel().MakeCurrentProcess(thread_owner_process); | 98 | system.Kernel().MakeCurrentProcess(thread_owner_process); |
| 98 | SetCurrentPageTable(&Core::CurrentProcess()->VMManager().page_table); | 99 | SetCurrentPageTable(&thread_owner_process->VMManager().page_table); |
| 99 | } | 100 | } |
| 100 | 101 | ||
| 101 | cpu_core.LoadContext(new_thread->GetContext()); | 102 | cpu_core.LoadContext(new_thread->GetContext()); |
| @@ -111,7 +112,7 @@ void Scheduler::SwitchContext(Thread* new_thread) { | |||
| 111 | 112 | ||
| 112 | void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { | 113 | void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { |
| 113 | const u64 prev_switch_ticks = last_context_switch_time; | 114 | const u64 prev_switch_ticks = last_context_switch_time; |
| 114 | const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); | 115 | const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); |
| 115 | const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; | 116 | const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; |
| 116 | 117 | ||
| 117 | if (thread != nullptr) { | 118 | if (thread != nullptr) { |
| @@ -223,8 +224,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) { | |||
| 223 | // Take the first non-nullptr one | 224 | // Take the first non-nullptr one |
| 224 | for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { | 225 | for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { |
| 225 | const auto res = | 226 | const auto res = |
| 226 | Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread( | 227 | system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority); |
| 227 | core, priority); | ||
| 228 | 228 | ||
| 229 | // If scheduler provides a suggested thread | 229 | // If scheduler provides a suggested thread |
| 230 | if (res != nullptr) { | 230 | if (res != nullptr) { |
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 97ced4dfc..1c5bf57d9 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h | |||
| @@ -13,7 +13,8 @@ | |||
| 13 | 13 | ||
| 14 | namespace Core { | 14 | namespace Core { |
| 15 | class ARM_Interface; | 15 | class ARM_Interface; |
| 16 | } | 16 | class System; |
| 17 | } // namespace Core | ||
| 17 | 18 | ||
| 18 | namespace Kernel { | 19 | namespace Kernel { |
| 19 | 20 | ||
| @@ -21,7 +22,7 @@ class Process; | |||
| 21 | 22 | ||
| 22 | class Scheduler final { | 23 | class Scheduler final { |
| 23 | public: | 24 | public: |
| 24 | explicit Scheduler(Core::ARM_Interface& cpu_core); | 25 | explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core); |
| 25 | ~Scheduler(); | 26 | ~Scheduler(); |
| 26 | 27 | ||
| 27 | /// Returns whether there are any threads that are ready to run. | 28 | /// Returns whether there are any threads that are ready to run. |
| @@ -162,6 +163,7 @@ private: | |||
| 162 | Core::ARM_Interface& cpu_core; | 163 | Core::ARM_Interface& cpu_core; |
| 163 | u64 last_context_switch_time = 0; | 164 | u64 last_context_switch_time = 0; |
| 164 | 165 | ||
| 166 | Core::System& system; | ||
| 165 | static std::mutex scheduler_mutex; | 167 | static std::mutex scheduler_mutex; |
| 166 | }; | 168 | }; |
| 167 | 169 | ||
diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index 027434f92..4d8a337a7 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp | |||
| @@ -63,42 +63,71 @@ void ServerSession::Acquire(Thread* thread) { | |||
| 63 | pending_requesting_threads.pop_back(); | 63 | pending_requesting_threads.pop_back(); |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | void ServerSession::ClientDisconnected() { | ||
| 67 | // We keep a shared pointer to the hle handler to keep it alive throughout | ||
| 68 | // the call to ClientDisconnected, as ClientDisconnected invalidates the | ||
| 69 | // hle_handler member itself during the course of the function executing. | ||
| 70 | std::shared_ptr<SessionRequestHandler> handler = hle_handler; | ||
| 71 | if (handler) { | ||
| 72 | // Note that after this returns, this server session's hle_handler is | ||
| 73 | // invalidated (set to null). | ||
| 74 | handler->ClientDisconnected(this); | ||
| 75 | } | ||
| 76 | |||
| 77 | // TODO(Subv): Force a wake up of all the ServerSession's waiting threads and set | ||
| 78 | // their WaitSynchronization result to 0xC920181A. | ||
| 79 | |||
| 80 | // Clean up the list of client threads with pending requests, they are unneeded now that the | ||
| 81 | // client endpoint is closed. | ||
| 82 | pending_requesting_threads.clear(); | ||
| 83 | currently_handling = nullptr; | ||
| 84 | } | ||
| 85 | |||
| 86 | void ServerSession::AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler) { | ||
| 87 | domain_request_handlers.push_back(std::move(handler)); | ||
| 88 | } | ||
| 89 | |||
| 90 | std::size_t ServerSession::NumDomainRequestHandlers() const { | ||
| 91 | return domain_request_handlers.size(); | ||
| 92 | } | ||
| 93 | |||
| 66 | ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { | 94 | ResultCode ServerSession::HandleDomainSyncRequest(Kernel::HLERequestContext& context) { |
| 67 | auto* const domain_message_header = context.GetDomainMessageHeader(); | 95 | if (!context.HasDomainMessageHeader()) { |
| 68 | if (domain_message_header) { | 96 | return RESULT_SUCCESS; |
| 69 | // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs | 97 | } |
| 70 | context.SetDomainRequestHandlers(domain_request_handlers); | 98 | |
| 71 | 99 | // Set domain handlers in HLE context, used for domain objects (IPC interfaces) as inputs | |
| 72 | // If there is a DomainMessageHeader, then this is CommandType "Request" | 100 | context.SetDomainRequestHandlers(domain_request_handlers); |
| 73 | const u32 object_id{context.GetDomainMessageHeader()->object_id}; | 101 | |
| 74 | switch (domain_message_header->command) { | 102 | // If there is a DomainMessageHeader, then this is CommandType "Request" |
| 75 | case IPC::DomainMessageHeader::CommandType::SendMessage: | 103 | const auto& domain_message_header = context.GetDomainMessageHeader(); |
| 76 | if (object_id > domain_request_handlers.size()) { | 104 | const u32 object_id{domain_message_header.object_id}; |
| 77 | LOG_CRITICAL(IPC, | 105 | switch (domain_message_header.command) { |
| 78 | "object_id {} is too big! This probably means a recent service call " | 106 | case IPC::DomainMessageHeader::CommandType::SendMessage: |
| 79 | "to {} needed to return a new interface!", | 107 | if (object_id > domain_request_handlers.size()) { |
| 80 | object_id, name); | 108 | LOG_CRITICAL(IPC, |
| 81 | UNREACHABLE(); | 109 | "object_id {} is too big! This probably means a recent service call " |
| 82 | return RESULT_SUCCESS; // Ignore error if asserts are off | 110 | "to {} needed to return a new interface!", |
| 83 | } | 111 | object_id, name); |
| 84 | return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); | 112 | UNREACHABLE(); |
| 85 | 113 | return RESULT_SUCCESS; // Ignore error if asserts are off | |
| 86 | case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: { | ||
| 87 | LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id); | ||
| 88 | |||
| 89 | domain_request_handlers[object_id - 1] = nullptr; | ||
| 90 | |||
| 91 | IPC::ResponseBuilder rb{context, 2}; | ||
| 92 | rb.Push(RESULT_SUCCESS); | ||
| 93 | return RESULT_SUCCESS; | ||
| 94 | } | ||
| 95 | } | 114 | } |
| 115 | return domain_request_handlers[object_id - 1]->HandleSyncRequest(context); | ||
| 96 | 116 | ||
| 97 | LOG_CRITICAL(IPC, "Unknown domain command={}", | 117 | case IPC::DomainMessageHeader::CommandType::CloseVirtualHandle: { |
| 98 | static_cast<int>(domain_message_header->command.Value())); | 118 | LOG_DEBUG(IPC, "CloseVirtualHandle, object_id=0x{:08X}", object_id); |
| 99 | ASSERT(false); | 119 | |
| 120 | domain_request_handlers[object_id - 1] = nullptr; | ||
| 121 | |||
| 122 | IPC::ResponseBuilder rb{context, 2}; | ||
| 123 | rb.Push(RESULT_SUCCESS); | ||
| 124 | return RESULT_SUCCESS; | ||
| 125 | } | ||
| 100 | } | 126 | } |
| 101 | 127 | ||
| 128 | LOG_CRITICAL(IPC, "Unknown domain command={}", | ||
| 129 | static_cast<int>(domain_message_header.command.Value())); | ||
| 130 | ASSERT(false); | ||
| 102 | return RESULT_SUCCESS; | 131 | return RESULT_SUCCESS; |
| 103 | } | 132 | } |
| 104 | 133 | ||
diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index e0e9d64c8..aea4ccfeb 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h | |||
| @@ -46,6 +46,14 @@ public: | |||
| 46 | return HANDLE_TYPE; | 46 | return HANDLE_TYPE; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | Session* GetParent() { | ||
| 50 | return parent.get(); | ||
| 51 | } | ||
| 52 | |||
| 53 | const Session* GetParent() const { | ||
| 54 | return parent.get(); | ||
| 55 | } | ||
| 56 | |||
| 49 | using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; | 57 | using SessionPair = std::tuple<SharedPtr<ServerSession>, SharedPtr<ClientSession>>; |
| 50 | 58 | ||
| 51 | /** | 59 | /** |
| @@ -78,23 +86,16 @@ public: | |||
| 78 | 86 | ||
| 79 | void Acquire(Thread* thread) override; | 87 | void Acquire(Thread* thread) override; |
| 80 | 88 | ||
| 81 | std::string name; ///< The name of this session (optional) | 89 | /// Called when a client disconnection occurs. |
| 82 | std::shared_ptr<Session> parent; ///< The parent session, which links to the client endpoint. | 90 | void ClientDisconnected(); |
| 83 | std::shared_ptr<SessionRequestHandler> | ||
| 84 | hle_handler; ///< This session's HLE request handler (applicable when not a domain) | ||
| 85 | 91 | ||
| 86 | /// This is the list of domain request handlers (after conversion to a domain) | 92 | /// Adds a new domain request handler to the collection of request handlers within |
| 87 | std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; | 93 | /// this ServerSession instance. |
| 88 | 94 | void AppendDomainRequestHandler(std::shared_ptr<SessionRequestHandler> handler); | |
| 89 | /// List of threads that are pending a response after a sync request. This list is processed in | ||
| 90 | /// a LIFO manner, thus, the last request will be dispatched first. | ||
| 91 | /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test. | ||
| 92 | std::vector<SharedPtr<Thread>> pending_requesting_threads; | ||
| 93 | 95 | ||
| 94 | /// Thread whose request is currently being handled. A request is considered "handled" when a | 96 | /// Retrieves the total number of domain request handlers that have been |
| 95 | /// response is sent via svcReplyAndReceive. | 97 | /// appended to this ServerSession instance. |
| 96 | /// TODO(Subv): Find a better name for this. | 98 | std::size_t NumDomainRequestHandlers() const; |
| 97 | SharedPtr<Thread> currently_handling; | ||
| 98 | 99 | ||
| 99 | /// Returns true if the session has been converted to a domain, otherwise False | 100 | /// Returns true if the session has been converted to a domain, otherwise False |
| 100 | bool IsDomain() const { | 101 | bool IsDomain() const { |
| @@ -129,8 +130,30 @@ private: | |||
| 129 | /// object handle. | 130 | /// object handle. |
| 130 | ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); | 131 | ResultCode HandleDomainSyncRequest(Kernel::HLERequestContext& context); |
| 131 | 132 | ||
| 133 | /// The parent session, which links to the client endpoint. | ||
| 134 | std::shared_ptr<Session> parent; | ||
| 135 | |||
| 136 | /// This session's HLE request handler (applicable when not a domain) | ||
| 137 | std::shared_ptr<SessionRequestHandler> hle_handler; | ||
| 138 | |||
| 139 | /// This is the list of domain request handlers (after conversion to a domain) | ||
| 140 | std::vector<std::shared_ptr<SessionRequestHandler>> domain_request_handlers; | ||
| 141 | |||
| 142 | /// List of threads that are pending a response after a sync request. This list is processed in | ||
| 143 | /// a LIFO manner, thus, the last request will be dispatched first. | ||
| 144 | /// TODO(Subv): Verify if this is indeed processed in LIFO using a hardware test. | ||
| 145 | std::vector<SharedPtr<Thread>> pending_requesting_threads; | ||
| 146 | |||
| 147 | /// Thread whose request is currently being handled. A request is considered "handled" when a | ||
| 148 | /// response is sent via svcReplyAndReceive. | ||
| 149 | /// TODO(Subv): Find a better name for this. | ||
| 150 | SharedPtr<Thread> currently_handling; | ||
| 151 | |||
| 132 | /// When set to True, converts the session to a domain at the end of the command | 152 | /// When set to True, converts the session to a domain at the end of the command |
| 133 | bool convert_to_domain{}; | 153 | bool convert_to_domain{}; |
| 154 | |||
| 155 | /// The name of this session (optional) | ||
| 156 | std::string name; | ||
| 134 | }; | 157 | }; |
| 135 | 158 | ||
| 136 | } // namespace Kernel | 159 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index 22d0c1dd5..62861da36 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/core.h" | ||
| 10 | #include "core/hle/kernel/errors.h" | 9 | #include "core/hle/kernel/errors.h" |
| 11 | #include "core/hle/kernel/kernel.h" | 10 | #include "core/hle/kernel/kernel.h" |
| 12 | #include "core/hle/kernel/shared_memory.h" | 11 | #include "core/hle/kernel/shared_memory.h" |
| @@ -34,8 +33,8 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_ | |||
| 34 | shared_memory->backing_block_offset = 0; | 33 | shared_memory->backing_block_offset = 0; |
| 35 | 34 | ||
| 36 | // Refresh the address mappings for the current process. | 35 | // Refresh the address mappings for the current process. |
| 37 | if (Core::CurrentProcess() != nullptr) { | 36 | if (kernel.CurrentProcess() != nullptr) { |
| 38 | Core::CurrentProcess()->VMManager().RefreshMemoryBlockMappings( | 37 | kernel.CurrentProcess()->VMManager().RefreshMemoryBlockMappings( |
| 39 | shared_memory->backing_block.get()); | 38 | shared_memory->backing_block.get()); |
| 40 | } | 39 | } |
| 41 | } else { | 40 | } else { |
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 75b88a333..7f5c0cc86 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "core/hle/kernel/address_arbiter.h" | 20 | #include "core/hle/kernel/address_arbiter.h" |
| 21 | #include "core/hle/kernel/client_port.h" | 21 | #include "core/hle/kernel/client_port.h" |
| 22 | #include "core/hle/kernel/client_session.h" | 22 | #include "core/hle/kernel/client_session.h" |
| 23 | #include "core/hle/kernel/errors.h" | ||
| 23 | #include "core/hle/kernel/handle_table.h" | 24 | #include "core/hle/kernel/handle_table.h" |
| 24 | #include "core/hle/kernel/kernel.h" | 25 | #include "core/hle/kernel/kernel.h" |
| 25 | #include "core/hle/kernel/mutex.h" | 26 | #include "core/hle/kernel/mutex.h" |
diff --git a/src/core/hle/result.h b/src/core/hle/result.h index bfb77cc31..1ed144481 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | ||
| 12 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 13 | 12 | ||
| 14 | // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes | 13 | // All the constants in this file come from http://switchbrew.org/index.php?title=Error_codes |
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp index f255f74b5..8c5bd6059 100644 --- a/src/core/hle/service/am/applets/software_keyboard.cpp +++ b/src/core/hle/service/am/applets/software_keyboard.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "common/string_util.h" | 7 | #include "common/string_util.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/frontend/applets/software_keyboard.h" | 9 | #include "core/frontend/applets/software_keyboard.h" |
| 10 | #include "core/hle/result.h" | ||
| 10 | #include "core/hle/service/am/am.h" | 11 | #include "core/hle/service/am/am.h" |
| 11 | #include "core/hle/service/am/applets/software_keyboard.h" | 12 | #include "core/hle/service/am/applets/software_keyboard.h" |
| 12 | 13 | ||
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h index efd5753a1..b93a30d28 100644 --- a/src/core/hle/service/am/applets/software_keyboard.h +++ b/src/core/hle/service/am/applets/software_keyboard.h | |||
| @@ -9,10 +9,13 @@ | |||
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | ||
| 12 | #include "common/swap.h" | 13 | #include "common/swap.h" |
| 13 | #include "core/hle/service/am/am.h" | 14 | #include "core/hle/service/am/am.h" |
| 14 | #include "core/hle/service/am/applets/applets.h" | 15 | #include "core/hle/service/am/applets/applets.h" |
| 15 | 16 | ||
| 17 | union ResultCode; | ||
| 18 | |||
| 16 | namespace Service::AM::Applets { | 19 | namespace Service::AM::Applets { |
| 17 | 20 | ||
| 18 | enum class KeysetDisable : u32 { | 21 | enum class KeysetDisable : u32 { |
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp index bbe813490..21f5e64c7 100644 --- a/src/core/hle/service/audio/audout_u.cpp +++ b/src/core/hle/service/audio/audout_u.cpp | |||
| @@ -107,7 +107,9 @@ private: | |||
| 107 | void StopAudioOut(Kernel::HLERequestContext& ctx) { | 107 | void StopAudioOut(Kernel::HLERequestContext& ctx) { |
| 108 | LOG_DEBUG(Service_Audio, "called"); | 108 | LOG_DEBUG(Service_Audio, "called"); |
| 109 | 109 | ||
| 110 | audio_core.StopStream(stream); | 110 | if (stream->IsPlaying()) { |
| 111 | audio_core.StopStream(stream); | ||
| 112 | } | ||
| 111 | 113 | ||
| 112 | IPC::ResponseBuilder rb{ctx, 2}; | 114 | IPC::ResponseBuilder rb{ctx, 2}; |
| 113 | rb.Push(RESULT_SUCCESS); | 115 | rb.Push(RESULT_SUCCESS); |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index dbe7ee6e8..20c7c39aa 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 | |||
| 36 | 36 | ||
| 37 | auto& instance = Core::System::GetInstance(); | 37 | auto& instance = Core::System::GetInstance(); |
| 38 | instance.GetPerfStats().EndGameFrame(); | 38 | instance.GetPerfStats().EndGameFrame(); |
| 39 | instance.Renderer().SwapBuffers(framebuffer); | 39 | instance.GPU().SwapBuffers(framebuffer); |
| 40 | } | 40 | } |
| 41 | 41 | ||
| 42 | } // namespace Service::Nvidia::Devices | 42 | } // namespace Service::Nvidia::Devices |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 466db7ccd..a34b9e753 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 178 | auto& gpu = system_instance.GPU(); | 178 | auto& gpu = system_instance.GPU(); |
| 179 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); | 179 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); |
| 180 | ASSERT(cpu_addr); | 180 | ASSERT(cpu_addr); |
| 181 | system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); | 181 | gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); |
| 182 | 182 | ||
| 183 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); | 183 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 184 | 184 | ||
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 0a650f36c..8ce7bc7a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | |||
| @@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< | |||
| 136 | return 0; | 136 | return 0; |
| 137 | } | 137 | } |
| 138 | 138 | ||
| 139 | static void PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 140 | if (entries.empty()) { | ||
| 141 | return; | ||
| 142 | } | ||
| 143 | |||
| 144 | auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()}; | ||
| 145 | dma_pusher.Push(std::move(entries)); | ||
| 146 | dma_pusher.DispatchCalls(); | ||
| 147 | } | ||
| 148 | |||
| 149 | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { | 139 | u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { |
| 150 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { | 140 | if (input.size() < sizeof(IoctlSubmitGpfifo)) { |
| 151 | UNIMPLEMENTED(); | 141 | UNIMPLEMENTED(); |
| @@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp | |||
| 163 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], | 153 | std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], |
| 164 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 154 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 165 | 155 | ||
| 166 | PushGPUEntries(std::move(entries)); | 156 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); |
| 167 | 157 | ||
| 168 | params.fence_out.id = 0; | 158 | params.fence_out.id = 0; |
| 169 | params.fence_out.value = 0; | 159 | params.fence_out.value = 0; |
| @@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) | |||
| 184 | Memory::ReadBlock(params.address, entries.data(), | 174 | Memory::ReadBlock(params.address, entries.data(), |
| 185 | params.num_entries * sizeof(Tegra::CommandListHeader)); | 175 | params.num_entries * sizeof(Tegra::CommandListHeader)); |
| 186 | 176 | ||
| 187 | PushGPUEntries(std::move(entries)); | 177 | Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); |
| 188 | 178 | ||
| 189 | params.fence_out.id = 0; | 179 | params.fence_out.id = 0; |
| 190 | params.fence_out.value = 0; | 180 | params.fence_out.value = 0; |
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 56f31e2ac..fc496b654 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -186,7 +186,7 @@ void NVFlinger::Compose() { | |||
| 186 | 186 | ||
| 187 | // There was no queued buffer to draw, render previous frame | 187 | // There was no queued buffer to draw, render previous frame |
| 188 | system_instance.GetPerfStats().EndGameFrame(); | 188 | system_instance.GetPerfStats().EndGameFrame(); |
| 189 | system_instance.Renderer().SwapBuffers({}); | 189 | system_instance.GPU().SwapBuffers({}); |
| 190 | continue; | 190 | continue; |
| 191 | } | 191 | } |
| 192 | 192 | ||
diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp index 74da4d5e6..e9ee73710 100644 --- a/src/core/hle/service/sm/controller.cpp +++ b/src/core/hle/service/sm/controller.cpp | |||
| @@ -30,7 +30,7 @@ void Controller::DuplicateSession(Kernel::HLERequestContext& ctx) { | |||
| 30 | 30 | ||
| 31 | IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; | 31 | IPC::ResponseBuilder rb{ctx, 2, 0, 1, IPC::ResponseBuilder::Flags::AlwaysMoveHandles}; |
| 32 | rb.Push(RESULT_SUCCESS); | 32 | rb.Push(RESULT_SUCCESS); |
| 33 | Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->parent->client}; | 33 | Kernel::SharedPtr<Kernel::ClientSession> session{ctx.Session()->GetParent()->client}; |
| 34 | rb.PushMoveObjects(session); | 34 | rb.PushMoveObjects(session); |
| 35 | 35 | ||
| 36 | LOG_DEBUG(Service, "session={}", session->GetObjectId()); | 36 | LOG_DEBUG(Service, "session={}", session->GetObjectId()); |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index ec279cef8..6591c45d2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | |||
| 356 | const VAddr overlap_end = std::min(end, region_end); | 356 | const VAddr overlap_end = std::min(end, region_end); |
| 357 | const VAddr overlap_size = overlap_end - overlap_start; | 357 | const VAddr overlap_size = overlap_end - overlap_start; |
| 358 | 358 | ||
| 359 | auto& rasterizer = system_instance.Renderer().Rasterizer(); | 359 | auto& gpu = system_instance.GPU(); |
| 360 | switch (mode) { | 360 | switch (mode) { |
| 361 | case FlushMode::Flush: | 361 | case FlushMode::Flush: |
| 362 | rasterizer.FlushRegion(overlap_start, overlap_size); | 362 | gpu.FlushRegion(overlap_start, overlap_size); |
| 363 | break; | 363 | break; |
| 364 | case FlushMode::Invalidate: | 364 | case FlushMode::Invalidate: |
| 365 | rasterizer.InvalidateRegion(overlap_start, overlap_size); | 365 | gpu.InvalidateRegion(overlap_start, overlap_size); |
| 366 | break; | 366 | break; |
| 367 | case FlushMode::FlushAndInvalidate: | 367 | case FlushMode::FlushAndInvalidate: |
| 368 | rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); | 368 | gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); |
| 369 | break; | 369 | break; |
| 370 | } | 370 | } |
| 371 | }; | 371 | }; |
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 2e232e1e7..6dd3139cc 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -91,7 +91,10 @@ void LogSettings() { | |||
| 91 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); | 91 | LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); |
| 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
| 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); | 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); |
| 94 | LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | ||
| 94 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); | 95 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); |
| 96 | LogSetting("Renderer_UseAsynchronousGpuEmulation", | ||
| 97 | Settings::values.use_asynchronous_gpu_emulation); | ||
| 95 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); | 98 | LogSetting("Audio_OutputEngine", Settings::values.sink_id); |
| 96 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); | 99 | LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching); |
| 97 | LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); | 100 | LogSetting("Audio_OutputDevice", Settings::values.audio_device_id); |
diff --git a/src/core/settings.h b/src/core/settings.h index 7e76e0466..cdfb2f742 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -393,6 +393,7 @@ struct Values { | |||
| 393 | u16 frame_limit; | 393 | u16 frame_limit; |
| 394 | bool use_disk_shader_cache; | 394 | bool use_disk_shader_cache; |
| 395 | bool use_accurate_gpu_emulation; | 395 | bool use_accurate_gpu_emulation; |
| 396 | bool use_asynchronous_gpu_emulation; | ||
| 396 | 397 | ||
| 397 | float bg_red; | 398 | float bg_red; |
| 398 | float bg_green; | 399 | float bg_green; |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 58dfcc4df..e1db06811 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() { | |||
| 162 | Settings::values.use_disk_shader_cache); | 162 | Settings::values.use_disk_shader_cache); |
| 163 | AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", | 163 | AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", |
| 164 | Settings::values.use_accurate_gpu_emulation); | 164 | Settings::values.use_accurate_gpu_emulation); |
| 165 | AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation", | ||
| 166 | Settings::values.use_asynchronous_gpu_emulation); | ||
| 165 | AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", | 167 | AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", |
| 166 | Settings::values.use_docked_mode); | 168 | Settings::values.use_docked_mode); |
| 167 | } | 169 | } |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e9d2b3be..0c3038c52 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -17,6 +17,12 @@ add_library(video_core STATIC | |||
| 17 | engines/shader_header.h | 17 | engines/shader_header.h |
| 18 | gpu.cpp | 18 | gpu.cpp |
| 19 | gpu.h | 19 | gpu.h |
| 20 | gpu_asynch.cpp | ||
| 21 | gpu_asynch.h | ||
| 22 | gpu_synch.cpp | ||
| 23 | gpu_synch.h | ||
| 24 | gpu_thread.cpp | ||
| 25 | gpu_thread.h | ||
| 20 | macro_interpreter.cpp | 26 | macro_interpreter.cpp |
| 21 | macro_interpreter.h | 27 | macro_interpreter.h |
| 22 | memory_manager.cpp | 28 | memory_manager.cpp |
| @@ -74,6 +80,7 @@ add_library(video_core STATIC | |||
| 74 | shader/decode/hfma2.cpp | 80 | shader/decode/hfma2.cpp |
| 75 | shader/decode/conversion.cpp | 81 | shader/decode/conversion.cpp |
| 76 | shader/decode/memory.cpp | 82 | shader/decode/memory.cpp |
| 83 | shader/decode/texture.cpp | ||
| 77 | shader/decode/float_set_predicate.cpp | 84 | shader/decode/float_set_predicate.cpp |
| 78 | shader/decode/integer_set_predicate.cpp | 85 | shader/decode/integer_set_predicate.cpp |
| 79 | shader/decode/half_set_predicate.cpp | 86 | shader/decode/half_set_predicate.cpp |
| @@ -94,6 +101,8 @@ add_library(video_core STATIC | |||
| 94 | surface.h | 101 | surface.h |
| 95 | textures/astc.cpp | 102 | textures/astc.cpp |
| 96 | textures/astc.h | 103 | textures/astc.h |
| 104 | textures/convert.cpp | ||
| 105 | textures/convert.h | ||
| 97 | textures/decoders.cpp | 106 | textures/decoders.cpp |
| 98 | textures/decoders.h | 107 | textures/decoders.h |
| 99 | textures/texture.h | 108 | textures/texture.h |
| @@ -104,6 +113,8 @@ add_library(video_core STATIC | |||
| 104 | if (ENABLE_VULKAN) | 113 | if (ENABLE_VULKAN) |
| 105 | target_sources(video_core PRIVATE | 114 | target_sources(video_core PRIVATE |
| 106 | renderer_vulkan/declarations.h | 115 | renderer_vulkan/declarations.h |
| 116 | renderer_vulkan/maxwell_to_vk.cpp | ||
| 117 | renderer_vulkan/maxwell_to_vk.h | ||
| 107 | renderer_vulkan/vk_buffer_cache.cpp | 118 | renderer_vulkan/vk_buffer_cache.cpp |
| 108 | renderer_vulkan/vk_buffer_cache.h | 119 | renderer_vulkan/vk_buffer_cache.h |
| 109 | renderer_vulkan/vk_device.cpp | 120 | renderer_vulkan/vk_device.cpp |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 669541b4b..bff1a37ff 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -39,7 +39,7 @@ bool DmaPusher::Step() { | |||
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | const CommandList& command_list{dma_pushbuffer.front()}; | 41 | const CommandList& command_list{dma_pushbuffer.front()}; |
| 42 | const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; | 42 | const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; |
| 43 | GPUVAddr dma_get = command_list_header.addr; | 43 | GPUVAddr dma_get = command_list_header.addr; |
| 44 | GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); | 44 | GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); |
| 45 | bool non_main = command_list_header.is_non_main; | 45 | bool non_main = command_list_header.is_non_main; |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. |
| 49 | // We do this before actually writing the new data because the destination address might contain | 49 | // We do this before actually writing the new data because the destination address might contain |
| 50 | // a dirty surface that will have to be written back to memory. | 50 | // a dirty surface that will have to be written back to memory. |
| 51 | rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); | 51 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); |
| 52 | 52 | ||
| 53 | Memory::Write32(*dest_address, data); | 53 | Memory::Write32(*dest_address, data); |
| 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0474c7ba3..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() { | |||
| 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |
| 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated |
| 94 | // copying. | 94 | // copying. |
| 95 | rasterizer.FlushRegion(*source_cpu, src_size); | 95 | Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); |
| 96 | 96 | ||
| 97 | // We have to invalidate the destination region to evict any outdated surfaces from the | 97 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 98 | // cache. We do this before actually writing the new data because the destination address | 98 | // cache. We do this before actually writing the new data because the destination address |
| 99 | // might contain a dirty surface that will have to be written back to memory. | 99 | // might contain a dirty surface that will have to be written back to memory. |
| 100 | rasterizer.InvalidateRegion(*dest_cpu, dst_size); | 100 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); |
| 101 | }; | 101 | }; |
| 102 | 102 | ||
| 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c7eb15b6a..7f613370b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -324,11 +324,11 @@ enum class TextureQueryType : u64 { | |||
| 324 | 324 | ||
| 325 | enum class TextureProcessMode : u64 { | 325 | enum class TextureProcessMode : u64 { |
| 326 | None = 0, | 326 | None = 0, |
| 327 | LZ = 1, // Unknown, appears to be the same as none. | 327 | LZ = 1, // Load LOD of zero. |
| 328 | LB = 2, // Load Bias. | 328 | LB = 2, // Load Bias. |
| 329 | LL = 3, // Load LOD (LevelOfDetail) | 329 | LL = 3, // Load LOD. |
| 330 | LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB | 330 | LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. |
| 331 | LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL | 331 | LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. |
| 332 | }; | 332 | }; |
| 333 | 333 | ||
| 334 | enum class TextureMiscMode : u64 { | 334 | enum class TextureMiscMode : u64 { |
| @@ -1445,6 +1445,7 @@ public: | |||
| 1445 | Flow, | 1445 | Flow, |
| 1446 | Synch, | 1446 | Synch, |
| 1447 | Memory, | 1447 | Memory, |
| 1448 | Texture, | ||
| 1448 | FloatSet, | 1449 | FloatSet, |
| 1449 | FloatSetPredicate, | 1450 | FloatSetPredicate, |
| 1450 | IntegerSet, | 1451 | IntegerSet, |
| @@ -1575,14 +1576,14 @@ private: | |||
| 1575 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | 1576 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |
| 1576 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), | 1577 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), |
| 1577 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 1578 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 1578 | INST("110000----111---", Id::TEX, Type::Memory, "TEX"), | 1579 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
| 1579 | INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), | 1580 | INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), |
| 1580 | INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), | 1581 | INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), |
| 1581 | INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), | 1582 | INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), |
| 1582 | INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), | 1583 | INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), |
| 1583 | INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), | 1584 | INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), |
| 1584 | INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), | 1585 | INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), |
| 1585 | INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), | 1586 | INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), |
| 1586 | INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), | 1587 | INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), |
| 1587 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), | 1588 | INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), |
| 1588 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), | 1589 | INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ac30d1a89..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/engines/maxwell_dma.h" | 13 | #include "video_core/engines/maxwell_dma.h" |
| 14 | #include "video_core/gpu.h" | 14 | #include "video_core/gpu.h" |
| 15 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/renderer_base.h" |
| 16 | 16 | ||
| 17 | namespace Tegra { | 17 | namespace Tegra { |
| 18 | 18 | ||
| @@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { | |||
| 28 | UNREACHABLE(); | 28 | UNREACHABLE(); |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { | 31 | GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { |
| 32 | auto& rasterizer{renderer.Rasterizer()}; | ||
| 32 | memory_manager = std::make_unique<Tegra::MemoryManager>(); | 33 | memory_manager = std::make_unique<Tegra::MemoryManager>(); |
| 33 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); | 34 | dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); |
| 34 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); | 35 | maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 6313702f2..56a203275 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -16,8 +16,8 @@ class System; | |||
| 16 | } | 16 | } |
| 17 | 17 | ||
| 18 | namespace VideoCore { | 18 | namespace VideoCore { |
| 19 | class RasterizerInterface; | 19 | class RendererBase; |
| 20 | } | 20 | } // namespace VideoCore |
| 21 | 21 | ||
| 22 | namespace Tegra { | 22 | namespace Tegra { |
| 23 | 23 | ||
| @@ -119,10 +119,11 @@ enum class EngineID { | |||
| 119 | MAXWELL_DMA_COPY_A = 0xB0B5, | 119 | MAXWELL_DMA_COPY_A = 0xB0B5, |
| 120 | }; | 120 | }; |
| 121 | 121 | ||
| 122 | class GPU final { | 122 | class GPU { |
| 123 | public: | 123 | public: |
| 124 | explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); | 124 | explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); |
| 125 | ~GPU(); | 125 | |
| 126 | virtual ~GPU(); | ||
| 126 | 127 | ||
| 127 | struct MethodCall { | 128 | struct MethodCall { |
| 128 | u32 method{}; | 129 | u32 method{}; |
| @@ -200,8 +201,42 @@ public: | |||
| 200 | }; | 201 | }; |
| 201 | } regs{}; | 202 | } regs{}; |
| 202 | 203 | ||
| 204 | /// Push GPU command entries to be processed | ||
| 205 | virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; | ||
| 206 | |||
| 207 | /// Swap buffers (render frame) | ||
| 208 | virtual void SwapBuffers( | ||
| 209 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||
| 210 | |||
| 211 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 212 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | ||
| 213 | |||
| 214 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 215 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | ||
| 216 | |||
| 217 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 218 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | ||
| 219 | |||
| 203 | private: | 220 | private: |
| 221 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 222 | void ProcessSemaphoreTriggerMethod(); | ||
| 223 | void ProcessSemaphoreRelease(); | ||
| 224 | void ProcessSemaphoreAcquire(); | ||
| 225 | |||
| 226 | /// Calls a GPU puller method. | ||
| 227 | void CallPullerMethod(const MethodCall& method_call); | ||
| 228 | |||
| 229 | /// Calls a GPU engine method. | ||
| 230 | void CallEngineMethod(const MethodCall& method_call); | ||
| 231 | |||
| 232 | /// Determines where the method should be executed. | ||
| 233 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 234 | |||
| 235 | protected: | ||
| 204 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 236 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 237 | VideoCore::RendererBase& renderer; | ||
| 238 | |||
| 239 | private: | ||
| 205 | std::unique_ptr<Tegra::MemoryManager> memory_manager; | 240 | std::unique_ptr<Tegra::MemoryManager> memory_manager; |
| 206 | 241 | ||
| 207 | /// Mapping of command subchannels to their bound engine ids. | 242 | /// Mapping of command subchannels to their bound engine ids. |
| @@ -217,18 +252,6 @@ private: | |||
| 217 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; | 252 | std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; |
| 218 | /// Inline memory engine | 253 | /// Inline memory engine |
| 219 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; | 254 | std::unique_ptr<Engines::KeplerMemory> kepler_memory; |
| 220 | |||
| 221 | void ProcessBindMethod(const MethodCall& method_call); | ||
| 222 | void ProcessSemaphoreTriggerMethod(); | ||
| 223 | void ProcessSemaphoreRelease(); | ||
| 224 | void ProcessSemaphoreAcquire(); | ||
| 225 | |||
| 226 | // Calls a GPU puller method. | ||
| 227 | void CallPullerMethod(const MethodCall& method_call); | ||
| 228 | // Calls a GPU engine method. | ||
| 229 | void CallEngineMethod(const MethodCall& method_call); | ||
| 230 | // Determines where the method should be executed. | ||
| 231 | bool ExecuteMethodOnEngine(const MethodCall& method_call); | ||
| 232 | }; | 255 | }; |
| 233 | 256 | ||
| 234 | #define ASSERT_REG_POSITION(field_name, position) \ | 257 | #define ASSERT_REG_POSITION(field_name, position) \ |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp new file mode 100644 index 000000000..ad0a747e3 --- /dev/null +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/gpu_asynch.h" | ||
| 6 | #include "video_core/gpu_thread.h" | ||
| 7 | #include "video_core/renderer_base.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) | ||
| 12 | : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} | ||
| 13 | |||
| 14 | GPUAsynch::~GPUAsynch() = default; | ||
| 15 | |||
| 16 | void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 17 | gpu_thread.SubmitList(std::move(entries)); | ||
| 18 | } | ||
| 19 | |||
| 20 | void GPUAsynch::SwapBuffers( | ||
| 21 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 22 | gpu_thread.SwapBuffers(std::move(framebuffer)); | ||
| 23 | } | ||
| 24 | |||
| 25 | void GPUAsynch::FlushRegion(VAddr addr, u64 size) { | ||
| 26 | gpu_thread.FlushRegion(addr, size); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 30 | gpu_thread.InvalidateRegion(addr, size); | ||
| 31 | } | ||
| 32 | |||
| 33 | void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 34 | gpu_thread.FlushAndInvalidateRegion(addr, size); | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h new file mode 100644 index 000000000..e6a807aba --- /dev/null +++ b/src/video_core/gpu_asynch.h | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | #include "video_core/gpu_thread.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | class RendererBase; | ||
| 12 | } // namespace VideoCore | ||
| 13 | |||
| 14 | namespace VideoCommon { | ||
| 15 | |||
| 16 | namespace GPUThread { | ||
| 17 | class ThreadManager; | ||
| 18 | } // namespace GPUThread | ||
| 19 | |||
| 20 | /// Implementation of GPU interface that runs the GPU asynchronously | ||
| 21 | class GPUAsynch : public Tegra::GPU { | ||
| 22 | public: | ||
| 23 | explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); | ||
| 24 | ~GPUAsynch() override; | ||
| 25 | |||
| 26 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 27 | void SwapBuffers( | ||
| 28 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 29 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 30 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 31 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 32 | |||
| 33 | private: | ||
| 34 | GPUThread::ThreadManager gpu_thread; | ||
| 35 | }; | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp new file mode 100644 index 000000000..4c00b96c7 --- /dev/null +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -0,0 +1,37 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/gpu_synch.h" | ||
| 6 | #include "video_core/renderer_base.h" | ||
| 7 | |||
| 8 | namespace VideoCommon { | ||
| 9 | |||
| 10 | GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) | ||
| 11 | : Tegra::GPU(system, renderer) {} | ||
| 12 | |||
| 13 | GPUSynch::~GPUSynch() = default; | ||
| 14 | |||
| 15 | void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { | ||
| 16 | dma_pusher->Push(std::move(entries)); | ||
| 17 | dma_pusher->DispatchCalls(); | ||
| 18 | } | ||
| 19 | |||
| 20 | void GPUSynch::SwapBuffers( | ||
| 21 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 22 | renderer.SwapBuffers(std::move(framebuffer)); | ||
| 23 | } | ||
| 24 | |||
| 25 | void GPUSynch::FlushRegion(VAddr addr, u64 size) { | ||
| 26 | renderer.Rasterizer().FlushRegion(addr, size); | ||
| 27 | } | ||
| 28 | |||
| 29 | void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { | ||
| 30 | renderer.Rasterizer().InvalidateRegion(addr, size); | ||
| 31 | } | ||
| 32 | |||
| 33 | void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 34 | renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); | ||
| 35 | } | ||
| 36 | |||
| 37 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h new file mode 100644 index 000000000..7d5a241ff --- /dev/null +++ b/src/video_core/gpu_synch.h | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/gpu.h" | ||
| 8 | |||
| 9 | namespace VideoCore { | ||
| 10 | class RendererBase; | ||
| 11 | } // namespace VideoCore | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | /// Implementation of GPU interface that runs the GPU synchronously | ||
| 16 | class GPUSynch : public Tegra::GPU { | ||
| 17 | public: | ||
| 18 | explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); | ||
| 19 | ~GPUSynch() override; | ||
| 20 | |||
| 21 | void PushGPUEntries(Tegra::CommandList&& entries) override; | ||
| 22 | void SwapBuffers( | ||
| 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||
| 24 | void FlushRegion(VAddr addr, u64 size) override; | ||
| 25 | void InvalidateRegion(VAddr addr, u64 size) override; | ||
| 26 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||
| 27 | }; | ||
| 28 | |||
| 29 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..c5bdd2a17 --- /dev/null +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -0,0 +1,152 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/microprofile.h" | ||
| 7 | #include "core/frontend/scope_acquire_window_context.h" | ||
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/dma_pusher.h" | ||
| 10 | #include "video_core/gpu.h" | ||
| 11 | #include "video_core/gpu_thread.h" | ||
| 12 | #include "video_core/renderer_base.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::GPUThread { | ||
| 15 | |||
| 16 | /// Executes a single GPU thread command | ||
| 17 | static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, | ||
| 18 | Tegra::DmaPusher& dma_pusher) { | ||
| 19 | if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { | ||
| 20 | dma_pusher.Push(std::move(submit_list->entries)); | ||
| 21 | dma_pusher.DispatchCalls(); | ||
| 22 | } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { | ||
| 23 | renderer.SwapBuffers(data->framebuffer); | ||
| 24 | } else if (const auto data = std::get_if<FlushRegionCommand>(command)) { | ||
| 25 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||
| 26 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { | ||
| 27 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | ||
| 28 | } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { | ||
| 29 | renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); | ||
| 30 | } else { | ||
| 31 | UNREACHABLE(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | /// Runs the GPU thread | ||
| 36 | static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, | ||
| 37 | SynchState& state) { | ||
| 38 | |||
| 39 | MicroProfileOnThreadCreate("GpuThread"); | ||
| 40 | |||
| 41 | auto WaitForWakeup = [&]() { | ||
| 42 | std::unique_lock<std::mutex> lock{state.signal_mutex}; | ||
| 43 | state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); | ||
| 44 | }; | ||
| 45 | |||
| 46 | // Wait for first GPU command before acquiring the window context | ||
| 47 | WaitForWakeup(); | ||
| 48 | |||
| 49 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | ||
| 50 | if (!state.is_running) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | |||
| 54 | Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; | ||
| 55 | |||
| 56 | while (state.is_running) { | ||
| 57 | if (!state.is_running) { | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | |||
| 61 | { | ||
| 62 | // Thread has been woken up, so make the previous write queue the next read queue | ||
| 63 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 64 | std::swap(state.push_queue, state.pop_queue); | ||
| 65 | } | ||
| 66 | |||
| 67 | // Execute all of the GPU commands | ||
| 68 | while (!state.pop_queue->empty()) { | ||
| 69 | ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); | ||
| 70 | state.pop_queue->pop(); | ||
| 71 | } | ||
| 72 | |||
| 73 | state.UpdateIdleState(); | ||
| 74 | |||
| 75 | // Signal that the GPU thread has finished processing commands | ||
| 76 | if (state.is_idle) { | ||
| 77 | state.idle_condition.notify_one(); | ||
| 78 | } | ||
| 79 | |||
| 80 | // Wait for CPU thread to send more GPU commands | ||
| 81 | WaitForWakeup(); | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) | ||
| 86 | : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), | ||
| 87 | std::ref(dma_pusher), std::ref(state)}, | ||
| 88 | thread_id{thread.get_id()} {} | ||
| 89 | |||
| 90 | ThreadManager::~ThreadManager() { | ||
| 91 | { | ||
| 92 | // Notify GPU thread that a shutdown is pending | ||
| 93 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 94 | state.is_running = false; | ||
| 95 | } | ||
| 96 | |||
| 97 | state.signal_condition.notify_one(); | ||
| 98 | thread.join(); | ||
| 99 | } | ||
| 100 | |||
| 101 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||
| 102 | if (entries.empty()) { | ||
| 103 | return; | ||
| 104 | } | ||
| 105 | |||
| 106 | PushCommand(SubmitListCommand(std::move(entries)), false, false); | ||
| 107 | } | ||
| 108 | |||
| 109 | void ThreadManager::SwapBuffers( | ||
| 110 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||
| 111 | PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); | ||
| 112 | } | ||
| 113 | |||
| 114 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||
| 115 | // Block the CPU when using accurate emulation | ||
| 116 | PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); | ||
| 117 | } | ||
| 118 | |||
| 119 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||
| 120 | PushCommand(InvalidateRegionCommand(addr, size), true, true); | ||
| 121 | } | ||
| 122 | |||
| 123 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||
| 124 | InvalidateRegion(addr, size); | ||
| 125 | } | ||
| 126 | |||
| 127 | void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { | ||
| 128 | { | ||
| 129 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 130 | |||
| 131 | if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { | ||
| 132 | // Execute the command synchronously on the current thread | ||
| 133 | ExecuteCommand(&command_data, renderer, dma_pusher); | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | |||
| 137 | // Push the command to the GPU thread | ||
| 138 | state.UpdateIdleState(); | ||
| 139 | state.push_queue->emplace(command_data); | ||
| 140 | } | ||
| 141 | |||
| 142 | // Signal the GPU thread that commands are pending | ||
| 143 | state.signal_condition.notify_one(); | ||
| 144 | |||
| 145 | if (wait_for_idle) { | ||
| 146 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 147 | std::unique_lock<std::mutex> lock{state.idle_mutex}; | ||
| 148 | state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); }); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | |||
| 152 | } // namespace VideoCommon::GPUThread | ||
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..edb148b14 --- /dev/null +++ b/src/video_core/gpu_thread.h | |||
| @@ -0,0 +1,133 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <atomic> | ||
| 9 | #include <condition_variable> | ||
| 10 | #include <memory> | ||
| 11 | #include <mutex> | ||
| 12 | #include <optional> | ||
| 13 | #include <thread> | ||
| 14 | #include <variant> | ||
| 15 | |||
| 16 | namespace Tegra { | ||
| 17 | struct FramebufferConfig; | ||
| 18 | class DmaPusher; | ||
| 19 | } // namespace Tegra | ||
| 20 | |||
| 21 | namespace VideoCore { | ||
| 22 | class RendererBase; | ||
| 23 | } // namespace VideoCore | ||
| 24 | |||
| 25 | namespace VideoCommon::GPUThread { | ||
| 26 | |||
| 27 | /// Command to signal to the GPU thread that a command list is ready for processing | ||
| 28 | struct SubmitListCommand final { | ||
| 29 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} | ||
| 30 | |||
| 31 | Tegra::CommandList entries; | ||
| 32 | }; | ||
| 33 | |||
| 34 | /// Command to signal to the GPU thread that a swap buffers is pending | ||
| 35 | struct SwapBuffersCommand final { | ||
| 36 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) | ||
| 37 | : framebuffer{std::move(framebuffer)} {} | ||
| 38 | |||
| 39 | std::optional<const Tegra::FramebufferConfig> framebuffer; | ||
| 40 | }; | ||
| 41 | |||
| 42 | /// Command to signal to the GPU thread to flush a region | ||
| 43 | struct FlushRegionCommand final { | ||
| 44 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 45 | |||
| 46 | const VAddr addr; | ||
| 47 | const u64 size; | ||
| 48 | }; | ||
| 49 | |||
| 50 | /// Command to signal to the GPU thread to invalidate a region | ||
| 51 | struct InvalidateRegionCommand final { | ||
| 52 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | ||
| 53 | |||
| 54 | const VAddr addr; | ||
| 55 | const u64 size; | ||
| 56 | }; | ||
| 57 | |||
| 58 | /// Command to signal to the GPU thread to flush and invalidate a region | ||
| 59 | struct FlushAndInvalidateRegionCommand final { | ||
| 60 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) | ||
| 61 | : addr{addr}, size{size} {} | ||
| 62 | |||
| 63 | const VAddr addr; | ||
| 64 | const u64 size; | ||
| 65 | }; | ||
| 66 | |||
| 67 | using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||
| 68 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||
| 69 | |||
| 70 | /// Struct used to synchronize the GPU thread | ||
| 71 | struct SynchState final { | ||
| 72 | std::atomic<bool> is_running{true}; | ||
| 73 | std::atomic<bool> is_idle{true}; | ||
| 74 | std::condition_variable signal_condition; | ||
| 75 | std::mutex signal_mutex; | ||
| 76 | std::condition_variable idle_condition; | ||
| 77 | std::mutex idle_mutex; | ||
| 78 | |||
| 79 | // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and | ||
| 80 | // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes | ||
| 81 | // empty. This allows for efficient thread-safe access, as it does not require any copies. | ||
| 82 | |||
| 83 | using CommandQueue = std::queue<CommandData>; | ||
| 84 | std::array<CommandQueue, 2> command_queues; | ||
| 85 | CommandQueue* push_queue{&command_queues[0]}; | ||
| 86 | CommandQueue* pop_queue{&command_queues[1]}; | ||
| 87 | |||
| 88 | void UpdateIdleState() { | ||
| 89 | std::lock_guard<std::mutex> lock{idle_mutex}; | ||
| 90 | is_idle = command_queues[0].empty() && command_queues[1].empty(); | ||
| 91 | } | ||
| 92 | }; | ||
| 93 | |||
| 94 | /// Class used to manage the GPU thread | ||
| 95 | class ThreadManager final { | ||
| 96 | public: | ||
| 97 | explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); | ||
| 98 | ~ThreadManager(); | ||
| 99 | |||
| 100 | /// Push GPU command entries to be processed | ||
| 101 | void SubmitList(Tegra::CommandList&& entries); | ||
| 102 | |||
| 103 | /// Swap buffers (render frame) | ||
| 104 | void SwapBuffers( | ||
| 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | ||
| 106 | |||
| 107 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||
| 108 | void FlushRegion(VAddr addr, u64 size); | ||
| 109 | |||
| 110 | /// Notify rasterizer that any caches of the specified region should be invalidated | ||
| 111 | void InvalidateRegion(VAddr addr, u64 size); | ||
| 112 | |||
| 113 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||
| 114 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||
| 115 | |||
| 116 | private: | ||
| 117 | /// Pushes a command to be executed by the GPU thread | ||
| 118 | void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); | ||
| 119 | |||
| 120 | /// Returns true if this is called by the GPU thread | ||
| 121 | bool IsGpuThread() const { | ||
| 122 | return std::this_thread::get_id() == thread_id; | ||
| 123 | } | ||
| 124 | |||
| 125 | private: | ||
| 126 | SynchState state; | ||
| 127 | VideoCore::RendererBase& renderer; | ||
| 128 | Tegra::DmaPusher& dma_pusher; | ||
| 129 | std::thread thread; | ||
| 130 | std::thread::id thread_id; | ||
| 131 | }; | ||
| 132 | |||
| 133 | } // namespace VideoCommon::GPUThread | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 321d9dd3d..301562ff6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst | |||
| 118 | 118 | ||
| 119 | glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); | 119 | glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); |
| 120 | 120 | ||
| 121 | LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); | 121 | LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); |
| 122 | CheckExtensions(); | 122 | CheckExtensions(); |
| 123 | } | 123 | } |
| 124 | 124 | ||
| @@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { | |||
| 177 | continue; | 177 | continue; |
| 178 | 178 | ||
| 179 | const auto& buffer = regs.vertex_array[attrib.buffer]; | 179 | const auto& buffer = regs.vertex_array[attrib.buffer]; |
| 180 | LOG_TRACE(HW_GPU, | 180 | LOG_TRACE(Render_OpenGL, |
| 181 | "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | 181 | "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", |
| 182 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | 182 | index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), |
| 183 | attrib.offset.Value(), attrib.IsNormalized()); | 183 | attrib.offset.Value(), attrib.IsNormalized()); |
| @@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 343 | shader_program_manager->UseProgrammableFragmentShader(program_handle); | 343 | shader_program_manager->UseProgrammableFragmentShader(program_handle); |
| 344 | break; | 344 | break; |
| 345 | default: | 345 | default: |
| 346 | LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | 346 | UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, |
| 347 | shader_config.enable.Value(), shader_config.offset); | 347 | shader_config.enable.Value(), shader_config.offset); |
| 348 | UNREACHABLE(); | ||
| 349 | } | 348 | } |
| 350 | 349 | ||
| 351 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); | 350 | const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); |
| @@ -749,11 +748,7 @@ void RasterizerOpenGL::FlushAll() {} | |||
| 749 | 748 | ||
| 750 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | 749 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| 751 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 750 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 752 | 751 | res_cache.FlushRegion(addr, size); | |
| 753 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 754 | // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit | ||
| 755 | res_cache.FlushRegion(addr, size); | ||
| 756 | } | ||
| 757 | } | 752 | } |
| 758 | 753 | ||
| 759 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 754 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -797,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 797 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; | 792 | VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; |
| 798 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); | 793 | ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); |
| 799 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); | 794 | ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); |
| 800 | ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); | 795 | |
| 796 | if (params.pixel_format != pixel_format) { | ||
| 797 | LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); | ||
| 798 | } | ||
| 801 | 799 | ||
| 802 | screen_info.display_texture = surface->Texture().handle; | 800 | screen_info.display_texture = surface->Texture().handle; |
| 803 | 801 | ||
| @@ -943,8 +941,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader | |||
| 943 | size = buffer.size; | 941 | size = buffer.size; |
| 944 | 942 | ||
| 945 | if (size > MaxConstbufferSize) { | 943 | if (size > MaxConstbufferSize) { |
| 946 | LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, | 944 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, |
| 947 | MaxConstbufferSize); | 945 | MaxConstbufferSize); |
| 948 | size = MaxConstbufferSize; | 946 | size = MaxConstbufferSize; |
| 949 | } | 947 | } |
| 950 | } else { | 948 | } else { |
| @@ -1004,10 +1002,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s | |||
| 1004 | 1002 | ||
| 1005 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); | 1003 | texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); |
| 1006 | 1004 | ||
| 1007 | Surface surface = res_cache.GetTextureSurface(texture, entry); | 1005 | if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { |
| 1008 | if (surface != nullptr) { | ||
| 1009 | state.texture_units[current_bindpoint].texture = | 1006 | state.texture_units[current_bindpoint].texture = |
| 1010 | entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; | 1007 | surface->Texture(entry.IsArray()).handle; |
| 1011 | surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, | 1008 | surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, |
| 1012 | texture.tic.w_source); | 1009 | texture.tic.w_source); |
| 1013 | } else { | 1010 | } else { |
| @@ -1239,11 +1236,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { | |||
| 1239 | 1236 | ||
| 1240 | void RasterizerOpenGL::SyncTransformFeedback() { | 1237 | void RasterizerOpenGL::SyncTransformFeedback() { |
| 1241 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 1238 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 1242 | 1239 | UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); | |
| 1243 | if (regs.tfb_enabled != 0) { | ||
| 1244 | LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented"); | ||
| 1245 | UNREACHABLE(); | ||
| 1246 | } | ||
| 1247 | } | 1240 | } |
| 1248 | 1241 | ||
| 1249 | void RasterizerOpenGL::SyncPointState() { | 1242 | void RasterizerOpenGL::SyncPointState() { |
| @@ -1263,12 +1256,8 @@ void RasterizerOpenGL::SyncPolygonOffset() { | |||
| 1263 | 1256 | ||
| 1264 | void RasterizerOpenGL::CheckAlphaTests() { | 1257 | void RasterizerOpenGL::CheckAlphaTests() { |
| 1265 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | 1258 | const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; |
| 1266 | 1259 | UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, | |
| 1267 | if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { | 1260 | "Alpha Testing is enabled with more than one rendertarget"); |
| 1268 | LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, " | ||
| 1269 | "this behavior is undefined."); | ||
| 1270 | UNREACHABLE(); | ||
| 1271 | } | ||
| 1272 | } | 1261 | } |
| 1273 | 1262 | ||
| 1274 | } // namespace OpenGL | 1263 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index b5a9722f9..e9eb6e921 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | 21 | #include "video_core/renderer_opengl/gl_rasterizer_cache.h" |
| 22 | #include "video_core/renderer_opengl/utils.h" | 22 | #include "video_core/renderer_opengl/utils.h" |
| 23 | #include "video_core/surface.h" | 23 | #include "video_core/surface.h" |
| 24 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/convert.h" |
| 25 | #include "video_core/textures/decoders.h" | 25 | #include "video_core/textures/decoders.h" |
| 26 | 26 | ||
| 27 | namespace OpenGL { | 27 | namespace OpenGL { |
| @@ -400,6 +400,27 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType | |||
| 400 | return format; | 400 | return format; |
| 401 | } | 401 | } |
| 402 | 402 | ||
| 403 | /// Returns the discrepant array target | ||
| 404 | constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { | ||
| 405 | switch (target) { | ||
| 406 | case SurfaceTarget::Texture1D: | ||
| 407 | return GL_TEXTURE_1D_ARRAY; | ||
| 408 | case SurfaceTarget::Texture2D: | ||
| 409 | return GL_TEXTURE_2D_ARRAY; | ||
| 410 | case SurfaceTarget::Texture3D: | ||
| 411 | return GL_NONE; | ||
| 412 | case SurfaceTarget::Texture1DArray: | ||
| 413 | return GL_TEXTURE_1D; | ||
| 414 | case SurfaceTarget::Texture2DArray: | ||
| 415 | return GL_TEXTURE_2D; | ||
| 416 | case SurfaceTarget::TextureCubemap: | ||
| 417 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 418 | case SurfaceTarget::TextureCubeArray: | ||
| 419 | return GL_TEXTURE_CUBE_MAP; | ||
| 420 | } | ||
| 421 | return GL_NONE; | ||
| 422 | } | ||
| 423 | |||
| 403 | Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { | 424 | Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { |
| 404 | u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; | 425 | u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; |
| 405 | if (IsPixelFormatASTC(pixel_format)) { | 426 | if (IsPixelFormatASTC(pixel_format)) { |
| @@ -597,103 +618,6 @@ CachedSurface::CachedSurface(const SurfaceParams& params) | |||
| 597 | } | 618 | } |
| 598 | } | 619 | } |
| 599 | 620 | ||
| 600 | static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) { | ||
| 601 | union S8Z24 { | ||
| 602 | BitField<0, 24, u32> z24; | ||
| 603 | BitField<24, 8, u32> s8; | ||
| 604 | }; | ||
| 605 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 606 | |||
| 607 | union Z24S8 { | ||
| 608 | BitField<0, 8, u32> s8; | ||
| 609 | BitField<8, 24, u32> z24; | ||
| 610 | }; | ||
| 611 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 612 | |||
| 613 | S8Z24 s8z24_pixel{}; | ||
| 614 | Z24S8 z24s8_pixel{}; | ||
| 615 | constexpr auto bpp{GetBytesPerPixel(PixelFormat::S8Z24)}; | ||
| 616 | for (std::size_t y = 0; y < height; ++y) { | ||
| 617 | for (std::size_t x = 0; x < width; ++x) { | ||
| 618 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 619 | if (reverse) { | ||
| 620 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 621 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 622 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 623 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 624 | } else { | ||
| 625 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 626 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 627 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 628 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 629 | } | ||
| 630 | } | ||
| 631 | } | ||
| 632 | } | ||
| 633 | |||
| 634 | /** | ||
| 635 | * Helper function to perform software conversion (as needed) when loading a buffer from Switch | ||
| 636 | * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with | ||
| 637 | * typical desktop GPUs. | ||
| 638 | */ | ||
| 639 | static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||
| 640 | u32 width, u32 height, u32 depth) { | ||
| 641 | switch (pixel_format) { | ||
| 642 | case PixelFormat::ASTC_2D_4X4: | ||
| 643 | case PixelFormat::ASTC_2D_8X8: | ||
| 644 | case PixelFormat::ASTC_2D_8X5: | ||
| 645 | case PixelFormat::ASTC_2D_5X4: | ||
| 646 | case PixelFormat::ASTC_2D_5X5: | ||
| 647 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 648 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 649 | case PixelFormat::ASTC_2D_8X5_SRGB: | ||
| 650 | case PixelFormat::ASTC_2D_5X4_SRGB: | ||
| 651 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 652 | case PixelFormat::ASTC_2D_10X8: | ||
| 653 | case PixelFormat::ASTC_2D_10X8_SRGB: { | ||
| 654 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 655 | u32 block_width{}; | ||
| 656 | u32 block_height{}; | ||
| 657 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 658 | data = | ||
| 659 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | ||
| 660 | break; | ||
| 661 | } | ||
| 662 | case PixelFormat::S8Z24: | ||
| 663 | // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. | ||
| 664 | ConvertS8Z24ToZ24S8(data, width, height, false); | ||
| 665 | break; | ||
| 666 | } | ||
| 667 | } | ||
| 668 | |||
| 669 | /** | ||
| 670 | * Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to | ||
| 671 | * Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or | ||
| 672 | * with typical desktop GPUs. | ||
| 673 | */ | ||
| 674 | static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, | ||
| 675 | u32 width, u32 height) { | ||
| 676 | switch (pixel_format) { | ||
| 677 | case PixelFormat::ASTC_2D_4X4: | ||
| 678 | case PixelFormat::ASTC_2D_8X8: | ||
| 679 | case PixelFormat::ASTC_2D_4X4_SRGB: | ||
| 680 | case PixelFormat::ASTC_2D_8X8_SRGB: | ||
| 681 | case PixelFormat::ASTC_2D_5X5: | ||
| 682 | case PixelFormat::ASTC_2D_5X5_SRGB: | ||
| 683 | case PixelFormat::ASTC_2D_10X8: | ||
| 684 | case PixelFormat::ASTC_2D_10X8_SRGB: { | ||
| 685 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 686 | static_cast<u32>(pixel_format)); | ||
| 687 | UNREACHABLE(); | ||
| 688 | break; | ||
| 689 | } | ||
| 690 | case PixelFormat::S8Z24: | ||
| 691 | // Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24. | ||
| 692 | ConvertS8Z24ToZ24S8(data, width, height, true); | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | } | ||
| 696 | |||
| 697 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); | 621 | MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); |
| 698 | void CachedSurface::LoadGLBuffer() { | 622 | void CachedSurface::LoadGLBuffer() { |
| 699 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); | 623 | MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); |
| @@ -722,8 +646,16 @@ void CachedSurface::LoadGLBuffer() { | |||
| 722 | } | 646 | } |
| 723 | } | 647 | } |
| 724 | for (u32 i = 0; i < params.max_mip_level; i++) { | 648 | for (u32 i = 0; i < params.max_mip_level; i++) { |
| 725 | ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer[i], params.pixel_format, params.MipWidth(i), | 649 | const u32 width = params.MipWidth(i); |
| 726 | params.MipHeight(i), params.MipDepth(i)); | 650 | const u32 height = params.MipHeight(i); |
| 651 | const u32 depth = params.MipDepth(i); | ||
| 652 | if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { | ||
| 653 | // Reserve size for RGBA8 conversion | ||
| 654 | constexpr std::size_t rgba_bpp = 4; | ||
| 655 | gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); | ||
| 656 | } | ||
| 657 | Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, | ||
| 658 | height, depth, true, true); | ||
| 727 | } | 659 | } |
| 728 | } | 660 | } |
| 729 | 661 | ||
| @@ -746,8 +678,8 @@ void CachedSurface::FlushGLBuffer() { | |||
| 746 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, | 678 | glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, |
| 747 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); | 679 | static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); |
| 748 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | 680 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); |
| 749 | ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, | 681 | Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, |
| 750 | params.height); | 682 | params.height, params.depth, true, true); |
| 751 | const u8* const texture_src_data = Memory::GetPointer(params.addr); | 683 | const u8* const texture_src_data = Memory::GetPointer(params.addr); |
| 752 | ASSERT(texture_src_data); | 684 | ASSERT(texture_src_data); |
| 753 | if (params.is_tiled) { | 685 | if (params.is_tiled) { |
| @@ -884,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, | |||
| 884 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); | 816 | glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); |
| 885 | } | 817 | } |
| 886 | 818 | ||
| 887 | void CachedSurface::EnsureTextureView() { | 819 | void CachedSurface::EnsureTextureDiscrepantView() { |
| 888 | if (texture_view.handle != 0) | 820 | if (discrepant_view.handle != 0) |
| 889 | return; | 821 | return; |
| 890 | 822 | ||
| 891 | const GLenum target{TargetLayer()}; | 823 | const GLenum target{GetArrayDiscrepantTarget(params.target)}; |
| 824 | ASSERT(target != GL_NONE); | ||
| 825 | |||
| 892 | const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; | 826 | const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; |
| 893 | constexpr GLuint min_layer = 0; | 827 | constexpr GLuint min_layer = 0; |
| 894 | constexpr GLuint min_level = 0; | 828 | constexpr GLuint min_level = 0; |
| 895 | 829 | ||
| 896 | glGenTextures(1, &texture_view.handle); | 830 | glGenTextures(1, &discrepant_view.handle); |
| 897 | glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, | 831 | glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, |
| 898 | params.max_mip_level, min_layer, num_layers); | 832 | params.max_mip_level, min_layer, num_layers); |
| 899 | ApplyTextureDefaults(texture_view.handle, params.max_mip_level); | 833 | ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); |
| 900 | glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, | 834 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, |
| 901 | reinterpret_cast<const GLint*>(swizzle.data())); | 835 | reinterpret_cast<const GLint*>(swizzle.data())); |
| 902 | } | 836 | } |
| 903 | 837 | ||
| @@ -923,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, | |||
| 923 | swizzle = {new_x, new_y, new_z, new_w}; | 857 | swizzle = {new_x, new_y, new_z, new_w}; |
| 924 | const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); | 858 | const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); |
| 925 | glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | 859 | glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); |
| 926 | if (texture_view.handle != 0) { | 860 | if (discrepant_view.handle != 0) { |
| 927 | glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); | 861 | glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); |
| 928 | } | 862 | } |
| 929 | } | 863 | } |
| 930 | 864 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 797bbdc9c..9cf6f50be 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -367,31 +367,19 @@ public: | |||
| 367 | return texture; | 367 | return texture; |
| 368 | } | 368 | } |
| 369 | 369 | ||
| 370 | const OGLTexture& TextureLayer() { | 370 | const OGLTexture& Texture(bool as_array) { |
| 371 | if (params.is_array) { | 371 | if (params.is_array == as_array) { |
| 372 | return Texture(); | 372 | return texture; |
| 373 | } else { | ||
| 374 | EnsureTextureDiscrepantView(); | ||
| 375 | return discrepant_view; | ||
| 373 | } | 376 | } |
| 374 | EnsureTextureView(); | ||
| 375 | return texture_view; | ||
| 376 | } | 377 | } |
| 377 | 378 | ||
| 378 | GLenum Target() const { | 379 | GLenum Target() const { |
| 379 | return gl_target; | 380 | return gl_target; |
| 380 | } | 381 | } |
| 381 | 382 | ||
| 382 | GLenum TargetLayer() const { | ||
| 383 | using VideoCore::Surface::SurfaceTarget; | ||
| 384 | switch (params.target) { | ||
| 385 | case SurfaceTarget::Texture1D: | ||
| 386 | return GL_TEXTURE_1D_ARRAY; | ||
| 387 | case SurfaceTarget::Texture2D: | ||
| 388 | return GL_TEXTURE_2D_ARRAY; | ||
| 389 | case SurfaceTarget::TextureCubemap: | ||
| 390 | return GL_TEXTURE_CUBE_MAP_ARRAY; | ||
| 391 | } | ||
| 392 | return Target(); | ||
| 393 | } | ||
| 394 | |||
| 395 | const SurfaceParams& GetSurfaceParams() const { | 383 | const SurfaceParams& GetSurfaceParams() const { |
| 396 | return params; | 384 | return params; |
| 397 | } | 385 | } |
| @@ -431,10 +419,10 @@ public: | |||
| 431 | private: | 419 | private: |
| 432 | void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); | 420 | void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); |
| 433 | 421 | ||
| 434 | void EnsureTextureView(); | 422 | void EnsureTextureDiscrepantView(); |
| 435 | 423 | ||
| 436 | OGLTexture texture; | 424 | OGLTexture texture; |
| 437 | OGLTexture texture_view; | 425 | OGLTexture discrepant_view; |
| 438 | std::vector<std::vector<u8>> gl_buffer; | 426 | std::vector<std::vector<u8>> gl_buffer; |
| 439 | SurfaceParams params{}; | 427 | SurfaceParams params{}; |
| 440 | GLenum gl_target{}; | 428 | GLenum gl_target{}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 72ff6ac6a..11d1169f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -5,7 +5,9 @@ | |||
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <string> | 6 | #include <string> |
| 7 | #include <string_view> | 7 | #include <string_view> |
| 8 | #include <utility> | ||
| 8 | #include <variant> | 9 | #include <variant> |
| 10 | #include <vector> | ||
| 9 | 11 | ||
| 10 | #include <fmt/format.h> | 12 | #include <fmt/format.h> |
| 11 | 13 | ||
| @@ -717,7 +719,7 @@ private: | |||
| 717 | } | 719 | } |
| 718 | 720 | ||
| 719 | std::string GenerateTexture(Operation operation, const std::string& func, | 721 | std::string GenerateTexture(Operation operation, const std::string& func, |
| 720 | bool is_extra_int = false) { | 722 | const std::vector<std::pair<Type, Node>>& extras) { |
| 721 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | 723 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 722 | 724 | ||
| 723 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 725 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| @@ -738,36 +740,47 @@ private: | |||
| 738 | expr += Visit(operation[i]); | 740 | expr += Visit(operation[i]); |
| 739 | 741 | ||
| 740 | const std::size_t next = i + 1; | 742 | const std::size_t next = i + 1; |
| 741 | if (next < count || has_array || has_shadow) | 743 | if (next < count) |
| 742 | expr += ", "; | 744 | expr += ", "; |
| 743 | } | 745 | } |
| 744 | if (has_array) { | 746 | if (has_array) { |
| 745 | expr += "float(ftoi(" + Visit(meta->array) + "))"; | 747 | expr += ", float(ftoi(" + Visit(meta->array) + "))"; |
| 746 | } | 748 | } |
| 747 | if (has_shadow) { | 749 | if (has_shadow) { |
| 748 | if (has_array) | 750 | expr += ", " + Visit(meta->depth_compare); |
| 749 | expr += ", "; | ||
| 750 | expr += Visit(meta->depth_compare); | ||
| 751 | } | 751 | } |
| 752 | expr += ')'; | 752 | expr += ')'; |
| 753 | 753 | ||
| 754 | for (const Node extra : meta->extras) { | 754 | for (const auto& extra_pair : extras) { |
| 755 | const auto [type, operand] = extra_pair; | ||
| 756 | if (operand == nullptr) { | ||
| 757 | continue; | ||
| 758 | } | ||
| 755 | expr += ", "; | 759 | expr += ", "; |
| 756 | if (is_extra_int) { | 760 | |
| 757 | if (const auto immediate = std::get_if<ImmediateNode>(extra)) { | 761 | switch (type) { |
| 762 | case Type::Int: | ||
| 763 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||
| 758 | // Inline the string as an immediate integer in GLSL (some extra arguments are | 764 | // Inline the string as an immediate integer in GLSL (some extra arguments are |
| 759 | // required to be constant) | 765 | // required to be constant) |
| 760 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | 766 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); |
| 761 | } else { | 767 | } else { |
| 762 | expr += "ftoi(" + Visit(extra) + ')'; | 768 | expr += "ftoi(" + Visit(operand) + ')'; |
| 763 | } | 769 | } |
| 764 | } else { | 770 | break; |
| 765 | expr += Visit(extra); | 771 | case Type::Float: |
| 772 | expr += Visit(operand); | ||
| 773 | break; | ||
| 774 | default: { | ||
| 775 | const auto type_int = static_cast<u32>(type); | ||
| 776 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||
| 777 | expr += '0'; | ||
| 778 | break; | ||
| 779 | } | ||
| 766 | } | 780 | } |
| 767 | } | 781 | } |
| 768 | 782 | ||
| 769 | expr += ')'; | 783 | return expr + ')'; |
| 770 | return expr; | ||
| 771 | } | 784 | } |
| 772 | 785 | ||
| 773 | std::string Assign(Operation operation) { | 786 | std::string Assign(Operation operation) { |
| @@ -1146,7 +1159,7 @@ private: | |||
| 1146 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1159 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1147 | ASSERT(meta); | 1160 | ASSERT(meta); |
| 1148 | 1161 | ||
| 1149 | std::string expr = GenerateTexture(operation, "texture"); | 1162 | std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); |
| 1150 | if (meta->sampler.IsShadow()) { | 1163 | if (meta->sampler.IsShadow()) { |
| 1151 | expr = "vec4(" + expr + ')'; | 1164 | expr = "vec4(" + expr + ')'; |
| 1152 | } | 1165 | } |
| @@ -1157,7 +1170,7 @@ private: | |||
| 1157 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1170 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1158 | ASSERT(meta); | 1171 | ASSERT(meta); |
| 1159 | 1172 | ||
| 1160 | std::string expr = GenerateTexture(operation, "textureLod"); | 1173 | std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); |
| 1161 | if (meta->sampler.IsShadow()) { | 1174 | if (meta->sampler.IsShadow()) { |
| 1162 | expr = "vec4(" + expr + ')'; | 1175 | expr = "vec4(" + expr + ')'; |
| 1163 | } | 1176 | } |
| @@ -1168,7 +1181,8 @@ private: | |||
| 1168 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1181 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1169 | ASSERT(meta); | 1182 | ASSERT(meta); |
| 1170 | 1183 | ||
| 1171 | return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + | 1184 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; |
| 1185 | return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + | ||
| 1172 | GetSwizzle(meta->element); | 1186 | GetSwizzle(meta->element); |
| 1173 | } | 1187 | } |
| 1174 | 1188 | ||
| @@ -1197,8 +1211,8 @@ private: | |||
| 1197 | ASSERT(meta); | 1211 | ASSERT(meta); |
| 1198 | 1212 | ||
| 1199 | if (meta->element < 2) { | 1213 | if (meta->element < 2) { |
| 1200 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + | 1214 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + |
| 1201 | GetSwizzle(meta->element) + "))"; | 1215 | " * vec2(256))" + GetSwizzle(meta->element) + "))"; |
| 1202 | } | 1216 | } |
| 1203 | return "0"; | 1217 | return "0"; |
| 1204 | } | 1218 | } |
| @@ -1224,9 +1238,9 @@ private: | |||
| 1224 | else if (next < count) | 1238 | else if (next < count) |
| 1225 | expr += ", "; | 1239 | expr += ", "; |
| 1226 | } | 1240 | } |
| 1227 | for (std::size_t i = 0; i < meta->extras.size(); ++i) { | 1241 | if (meta->lod) { |
| 1228 | expr += ", "; | 1242 | expr += ", "; |
| 1229 | expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); | 1243 | expr += CastOperand(Visit(meta->lod), Type::Int); |
| 1230 | } | 1244 | } |
| 1231 | expr += ')'; | 1245 | expr += ')'; |
| 1232 | 1246 | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index e60b2eb44..8b510b6ae 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -244,6 +244,21 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 244 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); | 244 | LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); |
| 245 | } | 245 | } |
| 246 | 246 | ||
| 247 | void RendererOpenGL::AddTelemetryFields() { | ||
| 248 | const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; | ||
| 249 | const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; | ||
| 250 | const char* const gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; | ||
| 251 | |||
| 252 | LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); | ||
| 253 | LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); | ||
| 254 | LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); | ||
| 255 | |||
| 256 | auto& telemetry_session = system.TelemetrySession(); | ||
| 257 | telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); | ||
| 258 | telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); | ||
| 259 | telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); | ||
| 260 | } | ||
| 261 | |||
| 247 | void RendererOpenGL::CreateRasterizer() { | 262 | void RendererOpenGL::CreateRasterizer() { |
| 248 | if (rasterizer) { | 263 | if (rasterizer) { |
| 249 | return; | 264 | return; |
| @@ -466,17 +481,7 @@ bool RendererOpenGL::Init() { | |||
| 466 | glDebugMessageCallback(DebugHandler, nullptr); | 481 | glDebugMessageCallback(DebugHandler, nullptr); |
| 467 | } | 482 | } |
| 468 | 483 | ||
| 469 | const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; | 484 | AddTelemetryFields(); |
| 470 | const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; | ||
| 471 | const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; | ||
| 472 | |||
| 473 | LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); | ||
| 474 | LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); | ||
| 475 | LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); | ||
| 476 | |||
| 477 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); | ||
| 478 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); | ||
| 479 | Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); | ||
| 480 | 485 | ||
| 481 | if (!GLAD_GL_VERSION_4_3) { | 486 | if (!GLAD_GL_VERSION_4_3) { |
| 482 | return false; | 487 | return false; |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index c168fa89e..6cbf9d2cb 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -60,6 +60,7 @@ public: | |||
| 60 | 60 | ||
| 61 | private: | 61 | private: |
| 62 | void InitOpenGLObjects(); | 62 | void InitOpenGLObjects(); |
| 63 | void AddTelemetryFields(); | ||
| 63 | void CreateRasterizer(); | 64 | void CreateRasterizer(); |
| 64 | 65 | ||
| 65 | void ConfigureFramebufferTexture(TextureInfo& texture, | 66 | void ConfigureFramebufferTexture(TextureInfo& texture, |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp new file mode 100644 index 000000000..34bf26ff2 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -0,0 +1,483 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 10 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/surface.h" | ||
| 13 | |||
| 14 | namespace Vulkan::MaxwellToVK { | ||
| 15 | |||
| 16 | namespace Sampler { | ||
| 17 | |||
| 18 | vk::Filter Filter(Tegra::Texture::TextureFilter filter) { | ||
| 19 | switch (filter) { | ||
| 20 | case Tegra::Texture::TextureFilter::Linear: | ||
| 21 | return vk::Filter::eLinear; | ||
| 22 | case Tegra::Texture::TextureFilter::Nearest: | ||
| 23 | return vk::Filter::eNearest; | ||
| 24 | } | ||
| 25 | UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); | ||
| 26 | return {}; | ||
| 27 | } | ||
| 28 | |||
| 29 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { | ||
| 30 | switch (mipmap_filter) { | ||
| 31 | case Tegra::Texture::TextureMipmapFilter::None: | ||
| 32 | // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping | ||
| 33 | // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to | ||
| 34 | // use an image view with a single mipmap level to emulate this. | ||
| 35 | return vk::SamplerMipmapMode::eLinear; | ||
| 36 | case Tegra::Texture::TextureMipmapFilter::Linear: | ||
| 37 | return vk::SamplerMipmapMode::eLinear; | ||
| 38 | case Tegra::Texture::TextureMipmapFilter::Nearest: | ||
| 39 | return vk::SamplerMipmapMode::eNearest; | ||
| 40 | } | ||
| 41 | UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); | ||
| 42 | return {}; | ||
| 43 | } | ||
| 44 | |||
| 45 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { | ||
| 46 | switch (wrap_mode) { | ||
| 47 | case Tegra::Texture::WrapMode::Wrap: | ||
| 48 | return vk::SamplerAddressMode::eRepeat; | ||
| 49 | case Tegra::Texture::WrapMode::Mirror: | ||
| 50 | return vk::SamplerAddressMode::eMirroredRepeat; | ||
| 51 | case Tegra::Texture::WrapMode::ClampToEdge: | ||
| 52 | return vk::SamplerAddressMode::eClampToEdge; | ||
| 53 | case Tegra::Texture::WrapMode::Border: | ||
| 54 | return vk::SamplerAddressMode::eClampToBorder; | ||
| 55 | case Tegra::Texture::WrapMode::ClampOGL: | ||
| 56 | // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use | ||
| 57 | // eClampToBorder to get the border color of the texture, and then sample the edge to | ||
| 58 | // manually mix them. However the shader part of this is not yet implemented. | ||
| 59 | return vk::SamplerAddressMode::eClampToBorder; | ||
| 60 | case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: | ||
| 61 | return vk::SamplerAddressMode::eMirrorClampToEdge; | ||
| 62 | case Tegra::Texture::WrapMode::MirrorOnceBorder: | ||
| 63 | UNIMPLEMENTED(); | ||
| 64 | return vk::SamplerAddressMode::eMirrorClampToEdge; | ||
| 65 | } | ||
| 66 | UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); | ||
| 67 | return {}; | ||
| 68 | } | ||
| 69 | |||
| 70 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { | ||
| 71 | switch (depth_compare_func) { | ||
| 72 | case Tegra::Texture::DepthCompareFunc::Never: | ||
| 73 | return vk::CompareOp::eNever; | ||
| 74 | case Tegra::Texture::DepthCompareFunc::Less: | ||
| 75 | return vk::CompareOp::eLess; | ||
| 76 | case Tegra::Texture::DepthCompareFunc::LessEqual: | ||
| 77 | return vk::CompareOp::eLessOrEqual; | ||
| 78 | case Tegra::Texture::DepthCompareFunc::Equal: | ||
| 79 | return vk::CompareOp::eEqual; | ||
| 80 | case Tegra::Texture::DepthCompareFunc::NotEqual: | ||
| 81 | return vk::CompareOp::eNotEqual; | ||
| 82 | case Tegra::Texture::DepthCompareFunc::Greater: | ||
| 83 | return vk::CompareOp::eGreater; | ||
| 84 | case Tegra::Texture::DepthCompareFunc::GreaterEqual: | ||
| 85 | return vk::CompareOp::eGreaterOrEqual; | ||
| 86 | case Tegra::Texture::DepthCompareFunc::Always: | ||
| 87 | return vk::CompareOp::eAlways; | ||
| 88 | } | ||
| 89 | UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", | ||
| 90 | static_cast<u32>(depth_compare_func)); | ||
| 91 | return {}; | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Sampler | ||
| 95 | |||
| 96 | struct FormatTuple { | ||
| 97 | vk::Format format; ///< Vulkan format | ||
| 98 | ComponentType component_type; ///< Abstracted component type | ||
| 99 | bool attachable; ///< True when this format can be used as an attachment | ||
| 100 | }; | ||
| 101 | |||
| 102 | static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ | ||
| 103 | {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U | ||
| 104 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S | ||
| 105 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI | ||
| 106 | {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U | ||
| 107 | {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U | ||
| 108 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U | ||
| 109 | {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U | ||
| 110 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI | ||
| 111 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F | ||
| 112 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U | ||
| 113 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI | ||
| 114 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F | ||
| 115 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI | ||
| 116 | {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1 | ||
| 117 | {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23 | ||
| 118 | {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45 | ||
| 119 | {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1 | ||
| 120 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM | ||
| 121 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM | ||
| 122 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U | ||
| 123 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16 | ||
| 124 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16 | ||
| 125 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4 | ||
| 126 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8 | ||
| 127 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F | ||
| 128 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F | ||
| 129 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F | ||
| 130 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F | ||
| 131 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U | ||
| 132 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S | ||
| 133 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI | ||
| 134 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I | ||
| 135 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16 | ||
| 136 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F | ||
| 137 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI | ||
| 138 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I | ||
| 139 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S | ||
| 140 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F | ||
| 141 | {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB | ||
| 142 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U | ||
| 143 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S | ||
| 144 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI | ||
| 145 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI | ||
| 146 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8 | ||
| 147 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5 | ||
| 148 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4 | ||
| 149 | |||
| 150 | // Compressed sRGB formats | ||
| 151 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB | ||
| 152 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB | ||
| 153 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB | ||
| 154 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB | ||
| 155 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB | ||
| 156 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB | ||
| 157 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB | ||
| 158 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB | ||
| 159 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB | ||
| 160 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5 | ||
| 161 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB | ||
| 162 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8 | ||
| 163 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB | ||
| 164 | |||
| 165 | // Depth formats | ||
| 166 | {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F | ||
| 167 | {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16 | ||
| 168 | |||
| 169 | // DepthStencil formats | ||
| 170 | {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8 | ||
| 171 | {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated) | ||
| 172 | {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8 | ||
| 173 | }}; | ||
| 174 | |||
| 175 | static constexpr bool IsZetaFormat(PixelFormat pixel_format) { | ||
| 176 | return pixel_format >= PixelFormat::MaxColorFormat && | ||
| 177 | pixel_format < PixelFormat::MaxDepthStencilFormat; | ||
| 178 | } | ||
| 179 | |||
| 180 | std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, | ||
| 181 | PixelFormat pixel_format, ComponentType component_type) { | ||
| 182 | ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); | ||
| 183 | |||
| 184 | const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)]; | ||
| 185 | UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined, | ||
| 186 | "Unimplemented texture format with pixel format={} and component type={}", | ||
| 187 | static_cast<u32>(pixel_format), static_cast<u32>(component_type)); | ||
| 188 | ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch"); | ||
| 189 | |||
| 190 | auto usage = vk::FormatFeatureFlagBits::eSampledImage | | ||
| 191 | vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc; | ||
| 192 | if (tuple.attachable) { | ||
| 193 | usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment | ||
| 194 | : vk::FormatFeatureFlagBits::eColorAttachment; | ||
| 195 | } | ||
| 196 | return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; | ||
| 197 | } | ||
| 198 | |||
| 199 | vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) { | ||
| 200 | switch (stage) { | ||
| 201 | case Maxwell::ShaderStage::Vertex: | ||
| 202 | return vk::ShaderStageFlagBits::eVertex; | ||
| 203 | case Maxwell::ShaderStage::TesselationControl: | ||
| 204 | return vk::ShaderStageFlagBits::eTessellationControl; | ||
| 205 | case Maxwell::ShaderStage::TesselationEval: | ||
| 206 | return vk::ShaderStageFlagBits::eTessellationEvaluation; | ||
| 207 | case Maxwell::ShaderStage::Geometry: | ||
| 208 | return vk::ShaderStageFlagBits::eGeometry; | ||
| 209 | case Maxwell::ShaderStage::Fragment: | ||
| 210 | return vk::ShaderStageFlagBits::eFragment; | ||
| 211 | } | ||
| 212 | UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); | ||
| 213 | return {}; | ||
| 214 | } | ||
| 215 | |||
| 216 | vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { | ||
| 217 | switch (topology) { | ||
| 218 | case Maxwell::PrimitiveTopology::Points: | ||
| 219 | return vk::PrimitiveTopology::ePointList; | ||
| 220 | case Maxwell::PrimitiveTopology::Lines: | ||
| 221 | return vk::PrimitiveTopology::eLineList; | ||
| 222 | case Maxwell::PrimitiveTopology::LineStrip: | ||
| 223 | return vk::PrimitiveTopology::eLineStrip; | ||
| 224 | case Maxwell::PrimitiveTopology::Triangles: | ||
| 225 | return vk::PrimitiveTopology::eTriangleList; | ||
| 226 | case Maxwell::PrimitiveTopology::TriangleStrip: | ||
| 227 | return vk::PrimitiveTopology::eTriangleStrip; | ||
| 228 | } | ||
| 229 | UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); | ||
| 230 | return {}; | ||
| 231 | } | ||
| 232 | |||
| 233 | vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { | ||
| 234 | switch (type) { | ||
| 235 | case Maxwell::VertexAttribute::Type::SignedNorm: | ||
| 236 | break; | ||
| 237 | case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||
| 238 | switch (size) { | ||
| 239 | case Maxwell::VertexAttribute::Size::Size_8_8_8_8: | ||
| 240 | return vk::Format::eR8G8B8A8Unorm; | ||
| 241 | default: | ||
| 242 | break; | ||
| 243 | } | ||
| 244 | break; | ||
| 245 | case Maxwell::VertexAttribute::Type::SignedInt: | ||
| 246 | break; | ||
| 247 | case Maxwell::VertexAttribute::Type::UnsignedInt: | ||
| 248 | switch (size) { | ||
| 249 | case Maxwell::VertexAttribute::Size::Size_32: | ||
| 250 | return vk::Format::eR32Uint; | ||
| 251 | default: | ||
| 252 | break; | ||
| 253 | } | ||
| 254 | case Maxwell::VertexAttribute::Type::UnsignedScaled: | ||
| 255 | case Maxwell::VertexAttribute::Type::SignedScaled: | ||
| 256 | break; | ||
| 257 | case Maxwell::VertexAttribute::Type::Float: | ||
| 258 | switch (size) { | ||
| 259 | case Maxwell::VertexAttribute::Size::Size_32_32_32_32: | ||
| 260 | return vk::Format::eR32G32B32A32Sfloat; | ||
| 261 | case Maxwell::VertexAttribute::Size::Size_32_32_32: | ||
| 262 | return vk::Format::eR32G32B32Sfloat; | ||
| 263 | case Maxwell::VertexAttribute::Size::Size_32_32: | ||
| 264 | return vk::Format::eR32G32Sfloat; | ||
| 265 | case Maxwell::VertexAttribute::Size::Size_32: | ||
| 266 | return vk::Format::eR32Sfloat; | ||
| 267 | default: | ||
| 268 | break; | ||
| 269 | } | ||
| 270 | break; | ||
| 271 | } | ||
| 272 | UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type), | ||
| 273 | static_cast<u32>(size)); | ||
| 274 | return {}; | ||
| 275 | } | ||
| 276 | |||
| 277 | vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | ||
| 278 | switch (comparison) { | ||
| 279 | case Maxwell::ComparisonOp::Never: | ||
| 280 | case Maxwell::ComparisonOp::NeverOld: | ||
| 281 | return vk::CompareOp::eNever; | ||
| 282 | case Maxwell::ComparisonOp::Less: | ||
| 283 | case Maxwell::ComparisonOp::LessOld: | ||
| 284 | return vk::CompareOp::eLess; | ||
| 285 | case Maxwell::ComparisonOp::Equal: | ||
| 286 | case Maxwell::ComparisonOp::EqualOld: | ||
| 287 | return vk::CompareOp::eEqual; | ||
| 288 | case Maxwell::ComparisonOp::LessEqual: | ||
| 289 | case Maxwell::ComparisonOp::LessEqualOld: | ||
| 290 | return vk::CompareOp::eLessOrEqual; | ||
| 291 | case Maxwell::ComparisonOp::Greater: | ||
| 292 | case Maxwell::ComparisonOp::GreaterOld: | ||
| 293 | return vk::CompareOp::eGreater; | ||
| 294 | case Maxwell::ComparisonOp::NotEqual: | ||
| 295 | case Maxwell::ComparisonOp::NotEqualOld: | ||
| 296 | return vk::CompareOp::eNotEqual; | ||
| 297 | case Maxwell::ComparisonOp::GreaterEqual: | ||
| 298 | case Maxwell::ComparisonOp::GreaterEqualOld: | ||
| 299 | return vk::CompareOp::eGreaterOrEqual; | ||
| 300 | case Maxwell::ComparisonOp::Always: | ||
| 301 | case Maxwell::ComparisonOp::AlwaysOld: | ||
| 302 | return vk::CompareOp::eAlways; | ||
| 303 | } | ||
| 304 | UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); | ||
| 305 | return {}; | ||
| 306 | } | ||
| 307 | |||
| 308 | vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) { | ||
| 309 | switch (index_format) { | ||
| 310 | case Maxwell::IndexFormat::UnsignedByte: | ||
| 311 | UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format"); | ||
| 312 | return vk::IndexType::eUint16; | ||
| 313 | case Maxwell::IndexFormat::UnsignedShort: | ||
| 314 | return vk::IndexType::eUint16; | ||
| 315 | case Maxwell::IndexFormat::UnsignedInt: | ||
| 316 | return vk::IndexType::eUint32; | ||
| 317 | } | ||
| 318 | UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); | ||
| 319 | return {}; | ||
| 320 | } | ||
| 321 | |||
| 322 | vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { | ||
| 323 | switch (stencil_op) { | ||
| 324 | case Maxwell::StencilOp::Keep: | ||
| 325 | case Maxwell::StencilOp::KeepOGL: | ||
| 326 | return vk::StencilOp::eKeep; | ||
| 327 | case Maxwell::StencilOp::Zero: | ||
| 328 | case Maxwell::StencilOp::ZeroOGL: | ||
| 329 | return vk::StencilOp::eZero; | ||
| 330 | case Maxwell::StencilOp::Replace: | ||
| 331 | case Maxwell::StencilOp::ReplaceOGL: | ||
| 332 | return vk::StencilOp::eReplace; | ||
| 333 | case Maxwell::StencilOp::Incr: | ||
| 334 | case Maxwell::StencilOp::IncrOGL: | ||
| 335 | return vk::StencilOp::eIncrementAndClamp; | ||
| 336 | case Maxwell::StencilOp::Decr: | ||
| 337 | case Maxwell::StencilOp::DecrOGL: | ||
| 338 | return vk::StencilOp::eDecrementAndClamp; | ||
| 339 | case Maxwell::StencilOp::Invert: | ||
| 340 | case Maxwell::StencilOp::InvertOGL: | ||
| 341 | return vk::StencilOp::eInvert; | ||
| 342 | case Maxwell::StencilOp::IncrWrap: | ||
| 343 | case Maxwell::StencilOp::IncrWrapOGL: | ||
| 344 | return vk::StencilOp::eIncrementAndWrap; | ||
| 345 | case Maxwell::StencilOp::DecrWrap: | ||
| 346 | case Maxwell::StencilOp::DecrWrapOGL: | ||
| 347 | return vk::StencilOp::eDecrementAndWrap; | ||
| 348 | } | ||
| 349 | UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); | ||
| 350 | return {}; | ||
| 351 | } | ||
| 352 | |||
| 353 | vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { | ||
| 354 | switch (equation) { | ||
| 355 | case Maxwell::Blend::Equation::Add: | ||
| 356 | case Maxwell::Blend::Equation::AddGL: | ||
| 357 | return vk::BlendOp::eAdd; | ||
| 358 | case Maxwell::Blend::Equation::Subtract: | ||
| 359 | case Maxwell::Blend::Equation::SubtractGL: | ||
| 360 | return vk::BlendOp::eSubtract; | ||
| 361 | case Maxwell::Blend::Equation::ReverseSubtract: | ||
| 362 | case Maxwell::Blend::Equation::ReverseSubtractGL: | ||
| 363 | return vk::BlendOp::eReverseSubtract; | ||
| 364 | case Maxwell::Blend::Equation::Min: | ||
| 365 | case Maxwell::Blend::Equation::MinGL: | ||
| 366 | return vk::BlendOp::eMin; | ||
| 367 | case Maxwell::Blend::Equation::Max: | ||
| 368 | case Maxwell::Blend::Equation::MaxGL: | ||
| 369 | return vk::BlendOp::eMax; | ||
| 370 | } | ||
| 371 | UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); | ||
| 372 | return {}; | ||
| 373 | } | ||
| 374 | |||
| 375 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { | ||
| 376 | switch (factor) { | ||
| 377 | case Maxwell::Blend::Factor::Zero: | ||
| 378 | case Maxwell::Blend::Factor::ZeroGL: | ||
| 379 | return vk::BlendFactor::eZero; | ||
| 380 | case Maxwell::Blend::Factor::One: | ||
| 381 | case Maxwell::Blend::Factor::OneGL: | ||
| 382 | return vk::BlendFactor::eOne; | ||
| 383 | case Maxwell::Blend::Factor::SourceColor: | ||
| 384 | case Maxwell::Blend::Factor::SourceColorGL: | ||
| 385 | return vk::BlendFactor::eSrcColor; | ||
| 386 | case Maxwell::Blend::Factor::OneMinusSourceColor: | ||
| 387 | case Maxwell::Blend::Factor::OneMinusSourceColorGL: | ||
| 388 | return vk::BlendFactor::eOneMinusSrcColor; | ||
| 389 | case Maxwell::Blend::Factor::SourceAlpha: | ||
| 390 | case Maxwell::Blend::Factor::SourceAlphaGL: | ||
| 391 | return vk::BlendFactor::eSrcAlpha; | ||
| 392 | case Maxwell::Blend::Factor::OneMinusSourceAlpha: | ||
| 393 | case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: | ||
| 394 | return vk::BlendFactor::eOneMinusSrcAlpha; | ||
| 395 | case Maxwell::Blend::Factor::DestAlpha: | ||
| 396 | case Maxwell::Blend::Factor::DestAlphaGL: | ||
| 397 | return vk::BlendFactor::eDstAlpha; | ||
| 398 | case Maxwell::Blend::Factor::OneMinusDestAlpha: | ||
| 399 | case Maxwell::Blend::Factor::OneMinusDestAlphaGL: | ||
| 400 | return vk::BlendFactor::eOneMinusDstAlpha; | ||
| 401 | case Maxwell::Blend::Factor::DestColor: | ||
| 402 | case Maxwell::Blend::Factor::DestColorGL: | ||
| 403 | return vk::BlendFactor::eDstColor; | ||
| 404 | case Maxwell::Blend::Factor::OneMinusDestColor: | ||
| 405 | case Maxwell::Blend::Factor::OneMinusDestColorGL: | ||
| 406 | return vk::BlendFactor::eOneMinusDstColor; | ||
| 407 | case Maxwell::Blend::Factor::SourceAlphaSaturate: | ||
| 408 | case Maxwell::Blend::Factor::SourceAlphaSaturateGL: | ||
| 409 | return vk::BlendFactor::eSrcAlphaSaturate; | ||
| 410 | case Maxwell::Blend::Factor::Source1Color: | ||
| 411 | case Maxwell::Blend::Factor::Source1ColorGL: | ||
| 412 | return vk::BlendFactor::eSrc1Color; | ||
| 413 | case Maxwell::Blend::Factor::OneMinusSource1Color: | ||
| 414 | case Maxwell::Blend::Factor::OneMinusSource1ColorGL: | ||
| 415 | return vk::BlendFactor::eOneMinusSrc1Color; | ||
| 416 | case Maxwell::Blend::Factor::Source1Alpha: | ||
| 417 | case Maxwell::Blend::Factor::Source1AlphaGL: | ||
| 418 | return vk::BlendFactor::eSrc1Alpha; | ||
| 419 | case Maxwell::Blend::Factor::OneMinusSource1Alpha: | ||
| 420 | case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: | ||
| 421 | return vk::BlendFactor::eOneMinusSrc1Alpha; | ||
| 422 | case Maxwell::Blend::Factor::ConstantColor: | ||
| 423 | case Maxwell::Blend::Factor::ConstantColorGL: | ||
| 424 | return vk::BlendFactor::eConstantColor; | ||
| 425 | case Maxwell::Blend::Factor::OneMinusConstantColor: | ||
| 426 | case Maxwell::Blend::Factor::OneMinusConstantColorGL: | ||
| 427 | return vk::BlendFactor::eOneMinusConstantColor; | ||
| 428 | case Maxwell::Blend::Factor::ConstantAlpha: | ||
| 429 | case Maxwell::Blend::Factor::ConstantAlphaGL: | ||
| 430 | return vk::BlendFactor::eConstantAlpha; | ||
| 431 | case Maxwell::Blend::Factor::OneMinusConstantAlpha: | ||
| 432 | case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: | ||
| 433 | return vk::BlendFactor::eOneMinusConstantAlpha; | ||
| 434 | } | ||
| 435 | UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); | ||
| 436 | return {}; | ||
| 437 | } | ||
| 438 | |||
| 439 | vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { | ||
| 440 | switch (front_face) { | ||
| 441 | case Maxwell::Cull::FrontFace::ClockWise: | ||
| 442 | return vk::FrontFace::eClockwise; | ||
| 443 | case Maxwell::Cull::FrontFace::CounterClockWise: | ||
| 444 | return vk::FrontFace::eCounterClockwise; | ||
| 445 | } | ||
| 446 | UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); | ||
| 447 | return {}; | ||
| 448 | } | ||
| 449 | |||
| 450 | vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { | ||
| 451 | switch (cull_face) { | ||
| 452 | case Maxwell::Cull::CullFace::Front: | ||
| 453 | return vk::CullModeFlagBits::eFront; | ||
| 454 | case Maxwell::Cull::CullFace::Back: | ||
| 455 | return vk::CullModeFlagBits::eBack; | ||
| 456 | case Maxwell::Cull::CullFace::FrontAndBack: | ||
| 457 | return vk::CullModeFlagBits::eFrontAndBack; | ||
| 458 | } | ||
| 459 | UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); | ||
| 460 | return {}; | ||
| 461 | } | ||
| 462 | |||
| 463 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { | ||
| 464 | switch (swizzle) { | ||
| 465 | case Tegra::Texture::SwizzleSource::Zero: | ||
| 466 | return vk::ComponentSwizzle::eZero; | ||
| 467 | case Tegra::Texture::SwizzleSource::R: | ||
| 468 | return vk::ComponentSwizzle::eR; | ||
| 469 | case Tegra::Texture::SwizzleSource::G: | ||
| 470 | return vk::ComponentSwizzle::eG; | ||
| 471 | case Tegra::Texture::SwizzleSource::B: | ||
| 472 | return vk::ComponentSwizzle::eB; | ||
| 473 | case Tegra::Texture::SwizzleSource::A: | ||
| 474 | return vk::ComponentSwizzle::eA; | ||
| 475 | case Tegra::Texture::SwizzleSource::OneInt: | ||
| 476 | case Tegra::Texture::SwizzleSource::OneFloat: | ||
| 477 | return vk::ComponentSwizzle::eOne; | ||
| 478 | } | ||
| 479 | UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); | ||
| 480 | return {}; | ||
| 481 | } | ||
| 482 | |||
| 483 | } // namespace Vulkan::MaxwellToVK | ||
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h new file mode 100644 index 000000000..4cadc0721 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <utility> | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/renderer_vulkan/declarations.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 12 | #include "video_core/surface.h" | ||
| 13 | #include "video_core/textures/texture.h" | ||
| 14 | |||
| 15 | namespace Vulkan::MaxwellToVK { | ||
| 16 | |||
| 17 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 18 | using PixelFormat = VideoCore::Surface::PixelFormat; | ||
| 19 | using ComponentType = VideoCore::Surface::ComponentType; | ||
| 20 | |||
| 21 | namespace Sampler { | ||
| 22 | |||
| 23 | vk::Filter Filter(Tegra::Texture::TextureFilter filter); | ||
| 24 | |||
| 25 | vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); | ||
| 26 | |||
| 27 | vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode); | ||
| 28 | |||
| 29 | vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); | ||
| 30 | |||
| 31 | } // namespace Sampler | ||
| 32 | |||
| 33 | std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, | ||
| 34 | PixelFormat pixel_format, ComponentType component_type); | ||
| 35 | |||
| 36 | vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); | ||
| 37 | |||
| 38 | vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); | ||
| 39 | |||
| 40 | vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); | ||
| 41 | |||
| 42 | vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | ||
| 43 | |||
| 44 | vk::IndexType IndexFormat(Maxwell::IndexFormat index_format); | ||
| 45 | |||
| 46 | vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); | ||
| 47 | |||
| 48 | vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); | ||
| 49 | |||
| 50 | vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); | ||
| 51 | |||
| 52 | vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); | ||
| 53 | |||
| 54 | vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); | ||
| 55 | |||
| 56 | vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); | ||
| 57 | |||
| 58 | } // namespace Vulkan::MaxwellToVK | ||
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 78a4e5f0e..00242ecbe 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp | |||
| @@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag | |||
| 122 | FormatType format_type) const { | 122 | FormatType format_type) const { |
| 123 | const auto it = format_properties.find(wanted_format); | 123 | const auto it = format_properties.find(wanted_format); |
| 124 | if (it == format_properties.end()) { | 124 | if (it == format_properties.end()) { |
| 125 | LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", | 125 | LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); |
| 126 | static_cast<u32>(wanted_format)); | ||
| 127 | UNREACHABLE(); | 126 | UNREACHABLE(); |
| 128 | return true; | 127 | return true; |
| 129 | } | 128 | } |
| @@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( | |||
| 219 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); | 218 | format_properties.emplace(format, physical.getFormatProperties(format, dldi)); |
| 220 | }; | 219 | }; |
| 221 | AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); | 220 | AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); |
| 222 | AddFormatQuery(vk::Format::eR5G6B5UnormPack16); | 221 | AddFormatQuery(vk::Format::eB5G6R5UnormPack16); |
| 222 | AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); | ||
| 223 | AddFormatQuery(vk::Format::eR8G8B8A8Srgb); | ||
| 224 | AddFormatQuery(vk::Format::eR8Unorm); | ||
| 223 | AddFormatQuery(vk::Format::eD32Sfloat); | 225 | AddFormatQuery(vk::Format::eD32Sfloat); |
| 226 | AddFormatQuery(vk::Format::eD16Unorm); | ||
| 224 | AddFormatQuery(vk::Format::eD16UnormS8Uint); | 227 | AddFormatQuery(vk::Format::eD16UnormS8Uint); |
| 225 | AddFormatQuery(vk::Format::eD24UnormS8Uint); | 228 | AddFormatQuery(vk::Format::eD24UnormS8Uint); |
| 226 | AddFormatQuery(vk::Format::eD32SfloatS8Uint); | 229 | AddFormatQuery(vk::Format::eD32SfloatS8Uint); |
| 230 | AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); | ||
| 231 | AddFormatQuery(vk::Format::eBc2UnormBlock); | ||
| 232 | AddFormatQuery(vk::Format::eBc3UnormBlock); | ||
| 233 | AddFormatQuery(vk::Format::eBc4UnormBlock); | ||
| 227 | 234 | ||
| 228 | return format_properties; | 235 | return format_properties; |
| 229 | } | 236 | } |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 740ac3118..e4c438792 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 165 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | 165 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, |
| 166 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | 166 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, |
| 167 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | 167 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, |
| 168 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | ||
| 168 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | 169 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, |
| 169 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | 170 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, |
| 170 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | 171 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 38f01ca50..ea3c71eed 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -17,24 +17,6 @@ using Tegra::Shader::Attribute; | |||
| 17 | using Tegra::Shader::Instruction; | 17 | using Tegra::Shader::Instruction; |
| 18 | using Tegra::Shader::OpCode; | 18 | using Tegra::Shader::OpCode; |
| 19 | using Tegra::Shader::Register; | 19 | using Tegra::Shader::Register; |
| 20 | using Tegra::Shader::TextureMiscMode; | ||
| 21 | using Tegra::Shader::TextureProcessMode; | ||
| 22 | using Tegra::Shader::TextureType; | ||
| 23 | |||
| 24 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 25 | switch (texture_type) { | ||
| 26 | case TextureType::Texture1D: | ||
| 27 | return 1; | ||
| 28 | case TextureType::Texture2D: | ||
| 29 | return 2; | ||
| 30 | case TextureType::Texture3D: | ||
| 31 | case TextureType::TextureCube: | ||
| 32 | return 3; | ||
| 33 | default: | ||
| 34 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | 20 | ||
| 39 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | 21 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
| 40 | const Instruction instr = {program_code[pc]}; | 22 | const Instruction instr = {program_code[pc]}; |
| @@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 247 | } | 229 | } |
| 248 | break; | 230 | break; |
| 249 | } | 231 | } |
| 250 | case OpCode::Id::TEX: { | ||
| 251 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 252 | "AOFFI is not implemented"); | ||
| 253 | |||
| 254 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 255 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 256 | } | ||
| 257 | |||
| 258 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 259 | const bool is_array = instr.tex.array != 0; | ||
| 260 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 261 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 262 | WriteTexInstructionFloat( | ||
| 263 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | case OpCode::Id::TEXS: { | ||
| 267 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 268 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 269 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 270 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 271 | |||
| 272 | if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 273 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | ||
| 274 | } | ||
| 275 | |||
| 276 | const Node4 components = | ||
| 277 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 278 | |||
| 279 | if (instr.texs.fp32_flag) { | ||
| 280 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 281 | } else { | ||
| 282 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 283 | } | ||
| 284 | break; | ||
| 285 | } | ||
| 286 | case OpCode::Id::TLD4: { | ||
| 287 | ASSERT(instr.tld4.array == 0); | ||
| 288 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 289 | "AOFFI is not implemented"); | ||
| 290 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 291 | "NDV is not implemented"); | ||
| 292 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||
| 293 | "PTP is not implemented"); | ||
| 294 | |||
| 295 | if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 296 | LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); | ||
| 297 | } | ||
| 298 | |||
| 299 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 300 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 301 | const bool is_array = instr.tld4.array != 0; | ||
| 302 | WriteTexInstructionFloat(bb, instr, | ||
| 303 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||
| 304 | break; | ||
| 305 | } | ||
| 306 | case OpCode::Id::TLD4S: { | ||
| 307 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 308 | "AOFFI is not implemented"); | ||
| 309 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 310 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | ||
| 311 | } | ||
| 312 | |||
| 313 | const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 314 | const Node op_a = GetRegister(instr.gpr8); | ||
| 315 | const Node op_b = GetRegister(instr.gpr20); | ||
| 316 | |||
| 317 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 318 | std::vector<Node> coords; | ||
| 319 | if (depth_compare) { | ||
| 320 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 321 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 322 | coords.push_back(op_a); | ||
| 323 | coords.push_back(op_y); | ||
| 324 | coords.push_back(op_b); | ||
| 325 | } else { | ||
| 326 | coords.push_back(op_a); | ||
| 327 | coords.push_back(op_b); | ||
| 328 | } | ||
| 329 | std::vector<Node> extras; | ||
| 330 | extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | ||
| 331 | |||
| 332 | const auto& sampler = | ||
| 333 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | ||
| 334 | |||
| 335 | Node4 values; | ||
| 336 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 337 | auto coords_copy = coords; | ||
| 338 | MetaTexture meta{sampler, {}, {}, extras, element}; | ||
| 339 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 340 | } | ||
| 341 | |||
| 342 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 343 | break; | ||
| 344 | } | ||
| 345 | case OpCode::Id::TXQ: { | ||
| 346 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 347 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | ||
| 348 | } | ||
| 349 | |||
| 350 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 351 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 352 | // uses. This must be fixed at a later instance. | ||
| 353 | const auto& sampler = | ||
| 354 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 355 | |||
| 356 | u32 indexer = 0; | ||
| 357 | switch (instr.txq.query_type) { | ||
| 358 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 359 | for (u32 element = 0; element < 4; ++element) { | ||
| 360 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 361 | continue; | ||
| 362 | } | ||
| 363 | MetaTexture meta{sampler, {}, {}, {}, element}; | ||
| 364 | const Node value = | ||
| 365 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||
| 366 | SetTemporal(bb, indexer++, value); | ||
| 367 | } | ||
| 368 | for (u32 i = 0; i < indexer; ++i) { | ||
| 369 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 370 | } | ||
| 371 | break; | ||
| 372 | } | ||
| 373 | default: | ||
| 374 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 375 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 376 | } | ||
| 377 | break; | ||
| 378 | } | ||
| 379 | case OpCode::Id::TMML: { | ||
| 380 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 381 | "NDV is not implemented"); | ||
| 382 | |||
| 383 | if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 384 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 385 | } | ||
| 386 | |||
| 387 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 388 | const bool is_array = instr.tmml.array != 0; | ||
| 389 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 390 | |||
| 391 | std::vector<Node> coords; | ||
| 392 | |||
| 393 | // TODO: Add coordinates for different samplers once other texture types are implemented. | ||
| 394 | switch (texture_type) { | ||
| 395 | case TextureType::Texture1D: | ||
| 396 | coords.push_back(GetRegister(instr.gpr8)); | ||
| 397 | break; | ||
| 398 | case TextureType::Texture2D: | ||
| 399 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 400 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 401 | break; | ||
| 402 | default: | ||
| 403 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 404 | |||
| 405 | // Fallback to interpreting as a 2D texture for now | ||
| 406 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 407 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 408 | texture_type = TextureType::Texture2D; | ||
| 409 | } | ||
| 410 | |||
| 411 | for (u32 element = 0; element < 2; ++element) { | ||
| 412 | auto params = coords; | ||
| 413 | MetaTexture meta{sampler, {}, {}, {}, element}; | ||
| 414 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | ||
| 415 | SetTemporal(bb, element, value); | ||
| 416 | } | ||
| 417 | for (u32 element = 0; element < 2; ++element) { | ||
| 418 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 419 | } | ||
| 420 | |||
| 421 | break; | ||
| 422 | } | ||
| 423 | case OpCode::Id::TLDS: { | ||
| 424 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 425 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 426 | |||
| 427 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 428 | "AOFFI is not implemented"); | ||
| 429 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 430 | |||
| 431 | if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 432 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | ||
| 433 | } | ||
| 434 | |||
| 435 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | ||
| 436 | break; | ||
| 437 | } | ||
| 438 | default: | 232 | default: |
| 439 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | 233 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); |
| 440 | } | 234 | } |
| @@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 442 | return pc; | 236 | return pc; |
| 443 | } | 237 | } |
| 444 | 238 | ||
| 445 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | ||
| 446 | bool is_array, bool is_shadow) { | ||
| 447 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 448 | |||
| 449 | // If this sampler has already been used, return the existing mapping. | ||
| 450 | const auto itr = | ||
| 451 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 452 | [&](const Sampler& entry) { return entry.GetOffset() == offset; }); | ||
| 453 | if (itr != used_samplers.end()) { | ||
| 454 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 455 | itr->IsShadow() == is_shadow); | ||
| 456 | return *itr; | ||
| 457 | } | ||
| 458 | |||
| 459 | // Otherwise create a new mapping for this sampler | ||
| 460 | const std::size_t next_index = used_samplers.size(); | ||
| 461 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||
| 462 | return *used_samplers.emplace(entry).first; | ||
| 463 | } | ||
| 464 | |||
| 465 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 466 | u32 dest_elem = 0; | ||
| 467 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 468 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 469 | // Skip disabled components | ||
| 470 | continue; | ||
| 471 | } | ||
| 472 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 473 | } | ||
| 474 | // After writing values in temporals, move them to the real registers | ||
| 475 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 476 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 477 | } | ||
| 478 | } | ||
| 479 | |||
| 480 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | ||
| 481 | const Node4& components) { | ||
| 482 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 483 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 484 | |||
| 485 | u32 dest_elem = 0; | ||
| 486 | for (u32 component = 0; component < 4; ++component) { | ||
| 487 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 488 | continue; | ||
| 489 | SetTemporal(bb, dest_elem++, components[component]); | ||
| 490 | } | ||
| 491 | |||
| 492 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 493 | if (i < 2) { | ||
| 494 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 495 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | ||
| 496 | } else { | ||
| 497 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 498 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 499 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | ||
| 500 | } | ||
| 501 | } | ||
| 502 | } | ||
| 503 | |||
| 504 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 505 | const Node4& components) { | ||
| 506 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 507 | // float instruction). | ||
| 508 | |||
| 509 | Node4 values; | ||
| 510 | u32 dest_elem = 0; | ||
| 511 | for (u32 component = 0; component < 4; ++component) { | ||
| 512 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 513 | continue; | ||
| 514 | values[dest_elem++] = components[component]; | ||
| 515 | } | ||
| 516 | if (dest_elem == 0) | ||
| 517 | return; | ||
| 518 | |||
| 519 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 520 | |||
| 521 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 522 | if (dest_elem <= 2) { | ||
| 523 | SetRegister(bb, instr.gpr0, first_value); | ||
| 524 | return; | ||
| 525 | } | ||
| 526 | |||
| 527 | SetTemporal(bb, 0, first_value); | ||
| 528 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 529 | |||
| 530 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 531 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 532 | } | ||
| 533 | |||
| 534 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 535 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 536 | Node array, Node depth_compare, u32 bias_offset) { | ||
| 537 | const bool is_array = array; | ||
| 538 | const bool is_shadow = depth_compare; | ||
| 539 | |||
| 540 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || | ||
| 541 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | ||
| 542 | "This method is not supported."); | ||
| 543 | |||
| 544 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); | ||
| 545 | |||
| 546 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 547 | process_mode == TextureProcessMode::LL || | ||
| 548 | process_mode == TextureProcessMode::LLA; | ||
| 549 | |||
| 550 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 551 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 552 | const bool gl_lod_supported = | ||
| 553 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || | ||
| 554 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); | ||
| 555 | |||
| 556 | const OperationCode read_method = | ||
| 557 | lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 558 | |||
| 559 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | ||
| 560 | |||
| 561 | std::vector<Node> extras; | ||
| 562 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | ||
| 563 | if (process_mode == TextureProcessMode::LZ) { | ||
| 564 | extras.push_back(Immediate(0.0f)); | ||
| 565 | } else { | ||
| 566 | // If present, lod or bias are always stored in the register indexed by the gpr20 | ||
| 567 | // field with an offset depending on the usage of the other registers | ||
| 568 | extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | ||
| 569 | } | ||
| 570 | } | ||
| 571 | |||
| 572 | Node4 values; | ||
| 573 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 574 | auto copy_coords = coords; | ||
| 575 | MetaTexture meta{sampler, array, depth_compare, extras, element}; | ||
| 576 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | ||
| 577 | } | ||
| 578 | |||
| 579 | return values; | ||
| 580 | } | ||
| 581 | |||
| 582 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 583 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 584 | const bool lod_bias_enabled = | ||
| 585 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 586 | |||
| 587 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 588 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 589 | // If enabled arrays index is always stored in the gpr8 field | ||
| 590 | const u64 array_register = instr.gpr8.Value(); | ||
| 591 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 592 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 593 | |||
| 594 | std::vector<Node> coords; | ||
| 595 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 596 | coords.push_back(GetRegister(coord_register + i)); | ||
| 597 | } | ||
| 598 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 599 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 600 | coords.push_back(Immediate(0.0f)); | ||
| 601 | } | ||
| 602 | |||
| 603 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 604 | |||
| 605 | Node dc{}; | ||
| 606 | if (depth_compare) { | ||
| 607 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 608 | // or bias are used | ||
| 609 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 610 | dc = GetRegister(depth_register); | ||
| 611 | } | ||
| 612 | |||
| 613 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); | ||
| 614 | } | ||
| 615 | |||
| 616 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 617 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 618 | const bool lod_bias_enabled = | ||
| 619 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 620 | |||
| 621 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 622 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 623 | // If enabled arrays index is always stored in the gpr8 field | ||
| 624 | const u64 array_register = instr.gpr8.Value(); | ||
| 625 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 626 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 627 | const u64 last_coord_register = | ||
| 628 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 629 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 630 | : coord_register + 1; | ||
| 631 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 632 | |||
| 633 | std::vector<Node> coords; | ||
| 634 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 635 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 636 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 637 | } | ||
| 638 | |||
| 639 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 640 | |||
| 641 | Node dc{}; | ||
| 642 | if (depth_compare) { | ||
| 643 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 644 | // or bias are used | ||
| 645 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 646 | dc = GetRegister(depth_register); | ||
| 647 | } | ||
| 648 | |||
| 649 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); | ||
| 650 | } | ||
| 651 | |||
| 652 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 653 | bool is_array) { | ||
| 654 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 655 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 656 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 657 | |||
| 658 | // If enabled arrays index is always stored in the gpr8 field | ||
| 659 | const u64 array_register = instr.gpr8.Value(); | ||
| 660 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 661 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 662 | |||
| 663 | std::vector<Node> coords; | ||
| 664 | for (size_t i = 0; i < coord_count; ++i) | ||
| 665 | coords.push_back(GetRegister(coord_register + i)); | ||
| 666 | |||
| 667 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 668 | |||
| 669 | Node4 values; | ||
| 670 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 671 | auto coords_copy = coords; | ||
| 672 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element}; | ||
| 673 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 674 | } | ||
| 675 | |||
| 676 | return values; | ||
| 677 | } | ||
| 678 | |||
| 679 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 680 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 681 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 682 | |||
| 683 | // If enabled arrays index is always stored in the gpr8 field | ||
| 684 | const u64 array_register = instr.gpr8.Value(); | ||
| 685 | // if is array gpr20 is used | ||
| 686 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 687 | |||
| 688 | const u64 last_coord_register = | ||
| 689 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 690 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 691 | : coord_register + 1; | ||
| 692 | |||
| 693 | std::vector<Node> coords; | ||
| 694 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 695 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 696 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 697 | } | ||
| 698 | |||
| 699 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 700 | // When lod is used always is in gpr20 | ||
| 701 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 702 | |||
| 703 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 704 | |||
| 705 | Node4 values; | ||
| 706 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 707 | auto coords_copy = coords; | ||
| 708 | MetaTexture meta{sampler, array, {}, {lod}, element}; | ||
| 709 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 710 | } | ||
| 711 | return values; | ||
| 712 | } | ||
| 713 | |||
| 714 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 715 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 716 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 717 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 718 | |||
| 719 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 720 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 721 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 722 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 723 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 724 | } | ||
| 725 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 726 | total_coord_count += | ||
| 727 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 728 | |||
| 729 | return {coord_count, total_coord_count}; | ||
| 730 | } | ||
| 731 | |||
| 732 | } // namespace VideoCommon::Shader | 239 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp new file mode 100644 index 000000000..a99ae19bf --- /dev/null +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -0,0 +1,534 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | using Tegra::Shader::TextureMiscMode; | ||
| 20 | using Tegra::Shader::TextureProcessMode; | ||
| 21 | using Tegra::Shader::TextureType; | ||
| 22 | |||
| 23 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 24 | switch (texture_type) { | ||
| 25 | case TextureType::Texture1D: | ||
| 26 | return 1; | ||
| 27 | case TextureType::Texture2D: | ||
| 28 | return 2; | ||
| 29 | case TextureType::Texture3D: | ||
| 30 | case TextureType::TextureCube: | ||
| 31 | return 3; | ||
| 32 | default: | ||
| 33 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 34 | return 0; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||
| 39 | const Instruction instr = {program_code[pc]}; | ||
| 40 | const auto opcode = OpCode::Decode(instr); | ||
| 41 | |||
| 42 | switch (opcode->get().GetId()) { | ||
| 43 | case OpCode::Id::TEX: { | ||
| 44 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 45 | "AOFFI is not implemented"); | ||
| 46 | |||
| 47 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 48 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 49 | } | ||
| 50 | |||
| 51 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 52 | const bool is_array = instr.tex.array != 0; | ||
| 53 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 55 | WriteTexInstructionFloat( | ||
| 56 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||
| 57 | break; | ||
| 58 | } | ||
| 59 | case OpCode::Id::TEXS: { | ||
| 60 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 61 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 62 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 63 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 64 | |||
| 65 | if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 66 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | ||
| 67 | } | ||
| 68 | |||
| 69 | const Node4 components = | ||
| 70 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 71 | |||
| 72 | if (instr.texs.fp32_flag) { | ||
| 73 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 74 | } else { | ||
| 75 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 76 | } | ||
| 77 | break; | ||
| 78 | } | ||
| 79 | case OpCode::Id::TLD4: { | ||
| 80 | ASSERT(instr.tld4.array == 0); | ||
| 81 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 82 | "AOFFI is not implemented"); | ||
| 83 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 84 | "NDV is not implemented"); | ||
| 85 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||
| 86 | "PTP is not implemented"); | ||
| 87 | |||
| 88 | if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 89 | LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); | ||
| 90 | } | ||
| 91 | |||
| 92 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 93 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 94 | const bool is_array = instr.tld4.array != 0; | ||
| 95 | WriteTexInstructionFloat(bb, instr, | ||
| 96 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | case OpCode::Id::TLD4S: { | ||
| 100 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 101 | "AOFFI is not implemented"); | ||
| 102 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 103 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | ||
| 104 | } | ||
| 105 | |||
| 106 | const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 107 | const Node op_a = GetRegister(instr.gpr8); | ||
| 108 | const Node op_b = GetRegister(instr.gpr20); | ||
| 109 | |||
| 110 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 111 | std::vector<Node> coords; | ||
| 112 | if (depth_compare) { | ||
| 113 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 114 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 115 | coords.push_back(op_a); | ||
| 116 | coords.push_back(op_y); | ||
| 117 | coords.push_back(op_b); | ||
| 118 | } else { | ||
| 119 | coords.push_back(op_a); | ||
| 120 | coords.push_back(op_b); | ||
| 121 | } | ||
| 122 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | ||
| 123 | |||
| 124 | const auto& sampler = | ||
| 125 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | ||
| 126 | |||
| 127 | Node4 values; | ||
| 128 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 129 | auto coords_copy = coords; | ||
| 130 | MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; | ||
| 131 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 132 | } | ||
| 133 | |||
| 134 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | case OpCode::Id::TXQ: { | ||
| 138 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 139 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | ||
| 140 | } | ||
| 141 | |||
| 142 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 143 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 144 | // uses. This must be fixed at a later instance. | ||
| 145 | const auto& sampler = | ||
| 146 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 147 | |||
| 148 | u32 indexer = 0; | ||
| 149 | switch (instr.txq.query_type) { | ||
| 150 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 151 | for (u32 element = 0; element < 4; ++element) { | ||
| 152 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 153 | continue; | ||
| 154 | } | ||
| 155 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | ||
| 156 | const Node value = | ||
| 157 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||
| 158 | SetTemporal(bb, indexer++, value); | ||
| 159 | } | ||
| 160 | for (u32 i = 0; i < indexer; ++i) { | ||
| 161 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 162 | } | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | default: | ||
| 166 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 167 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 168 | } | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | case OpCode::Id::TMML: { | ||
| 172 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 173 | "NDV is not implemented"); | ||
| 174 | |||
| 175 | if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 176 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 177 | } | ||
| 178 | |||
| 179 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 180 | const bool is_array = instr.tmml.array != 0; | ||
| 181 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 182 | |||
| 183 | std::vector<Node> coords; | ||
| 184 | |||
| 185 | // TODO: Add coordinates for different samplers once other texture types are implemented. | ||
| 186 | switch (texture_type) { | ||
| 187 | case TextureType::Texture1D: | ||
| 188 | coords.push_back(GetRegister(instr.gpr8)); | ||
| 189 | break; | ||
| 190 | case TextureType::Texture2D: | ||
| 191 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 192 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 193 | break; | ||
| 194 | default: | ||
| 195 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 196 | |||
| 197 | // Fallback to interpreting as a 2D texture for now | ||
| 198 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 199 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 200 | texture_type = TextureType::Texture2D; | ||
| 201 | } | ||
| 202 | |||
| 203 | for (u32 element = 0; element < 2; ++element) { | ||
| 204 | auto params = coords; | ||
| 205 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | ||
| 206 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | ||
| 207 | SetTemporal(bb, element, value); | ||
| 208 | } | ||
| 209 | for (u32 element = 0; element < 2; ++element) { | ||
| 210 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 211 | } | ||
| 212 | |||
| 213 | break; | ||
| 214 | } | ||
| 215 | case OpCode::Id::TLDS: { | ||
| 216 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 217 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 218 | |||
| 219 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 220 | "AOFFI is not implemented"); | ||
| 221 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 222 | |||
| 223 | if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 224 | LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); | ||
| 225 | } | ||
| 226 | |||
| 227 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | ||
| 228 | break; | ||
| 229 | } | ||
| 230 | default: | ||
| 231 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 232 | } | ||
| 233 | |||
| 234 | return pc; | ||
| 235 | } | ||
| 236 | |||
| 237 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | ||
| 238 | bool is_array, bool is_shadow) { | ||
| 239 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 240 | |||
| 241 | // If this sampler has already been used, return the existing mapping. | ||
| 242 | const auto itr = | ||
| 243 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 244 | [&](const Sampler& entry) { return entry.GetOffset() == offset; }); | ||
| 245 | if (itr != used_samplers.end()) { | ||
| 246 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 247 | itr->IsShadow() == is_shadow); | ||
| 248 | return *itr; | ||
| 249 | } | ||
| 250 | |||
| 251 | // Otherwise create a new mapping for this sampler | ||
| 252 | const std::size_t next_index = used_samplers.size(); | ||
| 253 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||
| 254 | return *used_samplers.emplace(entry).first; | ||
| 255 | } | ||
| 256 | |||
| 257 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 258 | u32 dest_elem = 0; | ||
| 259 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 260 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 261 | // Skip disabled components | ||
| 262 | continue; | ||
| 263 | } | ||
| 264 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 265 | } | ||
| 266 | // After writing values in temporals, move them to the real registers | ||
| 267 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 268 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 269 | } | ||
| 270 | } | ||
| 271 | |||
| 272 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, | ||
| 273 | const Node4& components) { | ||
| 274 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 275 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 276 | |||
| 277 | u32 dest_elem = 0; | ||
| 278 | for (u32 component = 0; component < 4; ++component) { | ||
| 279 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 280 | continue; | ||
| 281 | SetTemporal(bb, dest_elem++, components[component]); | ||
| 282 | } | ||
| 283 | |||
| 284 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 285 | if (i < 2) { | ||
| 286 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 287 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | ||
| 288 | } else { | ||
| 289 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 290 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 291 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | ||
| 292 | } | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 297 | const Node4& components) { | ||
| 298 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 299 | // float instruction). | ||
| 300 | |||
| 301 | Node4 values; | ||
| 302 | u32 dest_elem = 0; | ||
| 303 | for (u32 component = 0; component < 4; ++component) { | ||
| 304 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 305 | continue; | ||
| 306 | values[dest_elem++] = components[component]; | ||
| 307 | } | ||
| 308 | if (dest_elem == 0) | ||
| 309 | return; | ||
| 310 | |||
| 311 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 312 | |||
| 313 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 314 | if (dest_elem <= 2) { | ||
| 315 | SetRegister(bb, instr.gpr0, first_value); | ||
| 316 | return; | ||
| 317 | } | ||
| 318 | |||
| 319 | SetTemporal(bb, 0, first_value); | ||
| 320 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 321 | |||
| 322 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 323 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 324 | } | ||
| 325 | |||
| 326 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 327 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 328 | Node array, Node depth_compare, u32 bias_offset) { | ||
| 329 | const bool is_array = array; | ||
| 330 | const bool is_shadow = depth_compare; | ||
| 331 | |||
| 332 | UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || | ||
| 333 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | ||
| 334 | "This method is not supported."); | ||
| 335 | |||
| 336 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); | ||
| 337 | |||
| 338 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 339 | process_mode == TextureProcessMode::LL || | ||
| 340 | process_mode == TextureProcessMode::LLA; | ||
| 341 | |||
| 342 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 343 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 344 | const bool gl_lod_supported = | ||
| 345 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || | ||
| 346 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); | ||
| 347 | |||
| 348 | const OperationCode read_method = | ||
| 349 | (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 350 | |||
| 351 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | ||
| 352 | |||
| 353 | Node bias = {}; | ||
| 354 | Node lod = {}; | ||
| 355 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | ||
| 356 | switch (process_mode) { | ||
| 357 | case TextureProcessMode::LZ: | ||
| 358 | lod = Immediate(0.0f); | ||
| 359 | break; | ||
| 360 | case TextureProcessMode::LB: | ||
| 361 | // If present, lod or bias are always stored in the register indexed by the gpr20 | ||
| 362 | // field with an offset depending on the usage of the other registers | ||
| 363 | bias = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 364 | break; | ||
| 365 | case TextureProcessMode::LL: | ||
| 366 | lod = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 367 | break; | ||
| 368 | default: | ||
| 369 | UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); | ||
| 370 | break; | ||
| 371 | } | ||
| 372 | } | ||
| 373 | |||
| 374 | Node4 values; | ||
| 375 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 376 | auto copy_coords = coords; | ||
| 377 | MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; | ||
| 378 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | ||
| 379 | } | ||
| 380 | |||
| 381 | return values; | ||
| 382 | } | ||
| 383 | |||
| 384 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 385 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 386 | const bool lod_bias_enabled = | ||
| 387 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 388 | |||
| 389 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 390 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 391 | // If enabled arrays index is always stored in the gpr8 field | ||
| 392 | const u64 array_register = instr.gpr8.Value(); | ||
| 393 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 394 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 395 | |||
| 396 | std::vector<Node> coords; | ||
| 397 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 398 | coords.push_back(GetRegister(coord_register + i)); | ||
| 399 | } | ||
| 400 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 401 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 402 | coords.push_back(Immediate(0.0f)); | ||
| 403 | } | ||
| 404 | |||
| 405 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 406 | |||
| 407 | Node dc{}; | ||
| 408 | if (depth_compare) { | ||
| 409 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 410 | // or bias are used | ||
| 411 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 412 | dc = GetRegister(depth_register); | ||
| 413 | } | ||
| 414 | |||
| 415 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); | ||
| 416 | } | ||
| 417 | |||
| 418 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 419 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 420 | const bool lod_bias_enabled = | ||
| 421 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 422 | |||
| 423 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 424 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 425 | // If enabled arrays index is always stored in the gpr8 field | ||
| 426 | const u64 array_register = instr.gpr8.Value(); | ||
| 427 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 428 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 429 | const u64 last_coord_register = | ||
| 430 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 431 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 432 | : coord_register + 1; | ||
| 433 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 434 | |||
| 435 | std::vector<Node> coords; | ||
| 436 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 437 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 438 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 439 | } | ||
| 440 | |||
| 441 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 442 | |||
| 443 | Node dc{}; | ||
| 444 | if (depth_compare) { | ||
| 445 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 446 | // or bias are used | ||
| 447 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 448 | dc = GetRegister(depth_register); | ||
| 449 | } | ||
| 450 | |||
| 451 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); | ||
| 452 | } | ||
| 453 | |||
| 454 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 455 | bool is_array) { | ||
| 456 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 457 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 458 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 459 | |||
| 460 | // If enabled arrays index is always stored in the gpr8 field | ||
| 461 | const u64 array_register = instr.gpr8.Value(); | ||
| 462 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 463 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 464 | |||
| 465 | std::vector<Node> coords; | ||
| 466 | for (size_t i = 0; i < coord_count; ++i) | ||
| 467 | coords.push_back(GetRegister(coord_register + i)); | ||
| 468 | |||
| 469 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 470 | |||
| 471 | Node4 values; | ||
| 472 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 473 | auto coords_copy = coords; | ||
| 474 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; | ||
| 475 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 476 | } | ||
| 477 | |||
| 478 | return values; | ||
| 479 | } | ||
| 480 | |||
| 481 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 482 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 483 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 484 | |||
| 485 | // If enabled arrays index is always stored in the gpr8 field | ||
| 486 | const u64 array_register = instr.gpr8.Value(); | ||
| 487 | // if is array gpr20 is used | ||
| 488 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 489 | |||
| 490 | const u64 last_coord_register = | ||
| 491 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 492 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 493 | : coord_register + 1; | ||
| 494 | |||
| 495 | std::vector<Node> coords; | ||
| 496 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 497 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 498 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 499 | } | ||
| 500 | |||
| 501 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 502 | // When lod is used always is in gpr20 | ||
| 503 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 504 | |||
| 505 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 506 | |||
| 507 | Node4 values; | ||
| 508 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 509 | auto coords_copy = coords; | ||
| 510 | MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; | ||
| 511 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 512 | } | ||
| 513 | return values; | ||
| 514 | } | ||
| 515 | |||
| 516 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 517 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 518 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 519 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 520 | |||
| 521 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 522 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 523 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 524 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 525 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 526 | } | ||
| 527 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 528 | total_coord_count += | ||
| 529 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 530 | |||
| 531 | return {coord_count, total_coord_count}; | ||
| 532 | } | ||
| 533 | |||
| 534 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 52c7f2c4e..5bc3a3900 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -290,7 +290,9 @@ struct MetaTexture { | |||
| 290 | const Sampler& sampler; | 290 | const Sampler& sampler; |
| 291 | Node array{}; | 291 | Node array{}; |
| 292 | Node depth_compare{}; | 292 | Node depth_compare{}; |
| 293 | std::vector<Node> extras; | 293 | Node bias{}; |
| 294 | Node lod{}; | ||
| 295 | Node component{}; | ||
| 294 | u32 element{}; | 296 | u32 element{}; |
| 295 | }; | 297 | }; |
| 296 | 298 | ||
| @@ -614,6 +616,7 @@ private: | |||
| 614 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | 616 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); |
| 615 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | 617 | u32 DecodeConversion(NodeBlock& bb, u32 pc); |
| 616 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | 618 | u32 DecodeMemory(NodeBlock& bb, u32 pc); |
| 619 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | ||
| 617 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); | 620 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); |
| 618 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); | 621 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); |
| 619 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); | 622 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); |
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index bc50a4876..b508d64e9 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp | |||
| @@ -23,28 +23,12 @@ | |||
| 23 | 23 | ||
| 24 | #include "video_core/textures/astc.h" | 24 | #include "video_core/textures/astc.h" |
| 25 | 25 | ||
| 26 | class BitStream { | 26 | class InputBitStream { |
| 27 | public: | 27 | public: |
| 28 | explicit BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | 28 | explicit InputBitStream(const unsigned char* ptr, int nBits = 0, int start_offset = 0) |
| 29 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | 29 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} |
| 30 | 30 | ||
| 31 | ~BitStream() = default; | 31 | ~InputBitStream() = default; |
| 32 | |||
| 33 | int GetBitsWritten() const { | ||
| 34 | return m_BitsWritten; | ||
| 35 | } | ||
| 36 | |||
| 37 | void WriteBitsR(unsigned int val, unsigned int nBits) { | ||
| 38 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 39 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | void WriteBits(unsigned int val, unsigned int nBits) { | ||
| 44 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 45 | WriteBit((val >> i) & 1); | ||
| 46 | } | ||
| 47 | } | ||
| 48 | 32 | ||
| 49 | int GetBitsRead() const { | 33 | int GetBitsRead() const { |
| 50 | return m_BitsRead; | 34 | return m_BitsRead; |
| @@ -71,6 +55,38 @@ public: | |||
| 71 | } | 55 | } |
| 72 | 56 | ||
| 73 | private: | 57 | private: |
| 58 | const int m_NumBits; | ||
| 59 | const unsigned char* m_CurByte; | ||
| 60 | int m_NextBit = 0; | ||
| 61 | int m_BitsRead = 0; | ||
| 62 | |||
| 63 | bool done = false; | ||
| 64 | }; | ||
| 65 | |||
| 66 | class OutputBitStream { | ||
| 67 | public: | ||
| 68 | explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) | ||
| 69 | : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} | ||
| 70 | |||
| 71 | ~OutputBitStream() = default; | ||
| 72 | |||
| 73 | int GetBitsWritten() const { | ||
| 74 | return m_BitsWritten; | ||
| 75 | } | ||
| 76 | |||
| 77 | void WriteBitsR(unsigned int val, unsigned int nBits) { | ||
| 78 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 79 | WriteBit((val >> (nBits - i - 1)) & 1); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 | void WriteBits(unsigned int val, unsigned int nBits) { | ||
| 84 | for (unsigned int i = 0; i < nBits; i++) { | ||
| 85 | WriteBit((val >> i) & 1); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | private: | ||
| 74 | void WriteBit(int b) { | 90 | void WriteBit(int b) { |
| 75 | 91 | ||
| 76 | if (done) | 92 | if (done) |
| @@ -238,8 +254,8 @@ public: | |||
| 238 | // Fills result with the values that are encoded in the given | 254 | // Fills result with the values that are encoded in the given |
| 239 | // bitstream. We must know beforehand what the maximum possible | 255 | // bitstream. We must know beforehand what the maximum possible |
| 240 | // value is, and how many values we're decoding. | 256 | // value is, and how many values we're decoding. |
| 241 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits, | 257 | static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, |
| 242 | uint32_t maxRange, uint32_t nValues) { | 258 | InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { |
| 243 | // Determine encoding parameters | 259 | // Determine encoding parameters |
| 244 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); | 260 | IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); |
| 245 | 261 | ||
| @@ -267,7 +283,7 @@ public: | |||
| 267 | } | 283 | } |
| 268 | 284 | ||
| 269 | private: | 285 | private: |
| 270 | static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, | 286 | static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 271 | uint32_t nBitsPerValue) { | 287 | uint32_t nBitsPerValue) { |
| 272 | // Implement the algorithm in section C.2.12 | 288 | // Implement the algorithm in section C.2.12 |
| 273 | uint32_t m[5]; | 289 | uint32_t m[5]; |
| @@ -327,7 +343,7 @@ private: | |||
| 327 | } | 343 | } |
| 328 | } | 344 | } |
| 329 | 345 | ||
| 330 | static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result, | 346 | static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, |
| 331 | uint32_t nBitsPerValue) { | 347 | uint32_t nBitsPerValue) { |
| 332 | // Implement the algorithm in section C.2.12 | 348 | // Implement the algorithm in section C.2.12 |
| 333 | uint32_t m[3]; | 349 | uint32_t m[3]; |
| @@ -406,7 +422,7 @@ struct TexelWeightParams { | |||
| 406 | } | 422 | } |
| 407 | }; | 423 | }; |
| 408 | 424 | ||
| 409 | static TexelWeightParams DecodeBlockInfo(BitStream& strm) { | 425 | static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { |
| 410 | TexelWeightParams params; | 426 | TexelWeightParams params; |
| 411 | 427 | ||
| 412 | // Read the entire block mode all at once | 428 | // Read the entire block mode all at once |
| @@ -605,7 +621,7 @@ static TexelWeightParams DecodeBlockInfo(BitStream& strm) { | |||
| 605 | return params; | 621 | return params; |
| 606 | } | 622 | } |
| 607 | 623 | ||
| 608 | static void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, | 624 | static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, |
| 609 | uint32_t blockHeight) { | 625 | uint32_t blockHeight) { |
| 610 | // Don't actually care about the void extent, just read the bits... | 626 | // Don't actually care about the void extent, just read the bits... |
| 611 | for (int i = 0; i < 4; ++i) { | 627 | for (int i = 0; i < 4; ++i) { |
| @@ -821,7 +837,7 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode | |||
| 821 | 837 | ||
| 822 | // We now have enough to decode our integer sequence. | 838 | // We now have enough to decode our integer sequence. |
| 823 | std::vector<IntegerEncodedValue> decodedColorValues; | 839 | std::vector<IntegerEncodedValue> decodedColorValues; |
| 824 | BitStream colorStream(data); | 840 | InputBitStream colorStream(data); |
| 825 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); | 841 | IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); |
| 826 | 842 | ||
| 827 | // Once we have the decoded values, we need to dequantize them to the 0-255 range | 843 | // Once we have the decoded values, we need to dequantize them to the 0-255 range |
| @@ -1365,9 +1381,9 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue | |||
| 1365 | #undef READ_INT_VALUES | 1381 | #undef READ_INT_VALUES |
| 1366 | } | 1382 | } |
| 1367 | 1383 | ||
| 1368 | static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | 1384 | static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, |
| 1369 | const uint32_t blockHeight, uint32_t* outBuf) { | 1385 | const uint32_t blockHeight, uint32_t* outBuf) { |
| 1370 | BitStream strm(inBuf); | 1386 | InputBitStream strm(inBuf); |
| 1371 | TexelWeightParams weightParams = DecodeBlockInfo(strm); | 1387 | TexelWeightParams weightParams = DecodeBlockInfo(strm); |
| 1372 | 1388 | ||
| 1373 | // Was there an error? | 1389 | // Was there an error? |
| @@ -1421,7 +1437,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1421 | // Define color data. | 1437 | // Define color data. |
| 1422 | uint8_t colorEndpointData[16]; | 1438 | uint8_t colorEndpointData[16]; |
| 1423 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); | 1439 | memset(colorEndpointData, 0, sizeof(colorEndpointData)); |
| 1424 | BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); | 1440 | OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); |
| 1425 | 1441 | ||
| 1426 | // Read extra config data... | 1442 | // Read extra config data... |
| 1427 | uint32_t baseCEM = 0; | 1443 | uint32_t baseCEM = 0; |
| @@ -1549,7 +1565,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1549 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); | 1565 | memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); |
| 1550 | 1566 | ||
| 1551 | std::vector<IntegerEncodedValue> texelWeightValues; | 1567 | std::vector<IntegerEncodedValue> texelWeightValues; |
| 1552 | BitStream weightStream(texelWeightData); | 1568 | InputBitStream weightStream(texelWeightData); |
| 1553 | 1569 | ||
| 1554 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, | 1570 | IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, |
| 1555 | weightParams.m_MaxWeight, | 1571 | weightParams.m_MaxWeight, |
| @@ -1597,7 +1613,7 @@ static void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, | |||
| 1597 | 1613 | ||
| 1598 | namespace Tegra::Texture::ASTC { | 1614 | namespace Tegra::Texture::ASTC { |
| 1599 | 1615 | ||
| 1600 | std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, | 1616 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, |
| 1601 | uint32_t depth, uint32_t block_width, uint32_t block_height) { | 1617 | uint32_t depth, uint32_t block_width, uint32_t block_height) { |
| 1602 | uint32_t blockIdx = 0; | 1618 | uint32_t blockIdx = 0; |
| 1603 | std::vector<uint8_t> outData(height * width * depth * 4); | 1619 | std::vector<uint8_t> outData(height * width * depth * 4); |
| @@ -1605,7 +1621,7 @@ std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint | |||
| 1605 | for (uint32_t j = 0; j < height; j += block_height) { | 1621 | for (uint32_t j = 0; j < height; j += block_height) { |
| 1606 | for (uint32_t i = 0; i < width; i += block_width) { | 1622 | for (uint32_t i = 0; i < width; i += block_width) { |
| 1607 | 1623 | ||
| 1608 | uint8_t* blockPtr = data.data() + blockIdx * 16; | 1624 | const uint8_t* blockPtr = data + blockIdx * 16; |
| 1609 | 1625 | ||
| 1610 | // Blocks can be at most 12x12 | 1626 | // Blocks can be at most 12x12 |
| 1611 | uint32_t uncompData[144]; | 1627 | uint32_t uncompData[144]; |
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h index d419dd025..991cdba72 100644 --- a/src/video_core/textures/astc.h +++ b/src/video_core/textures/astc.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | namespace Tegra::Texture::ASTC { | 10 | namespace Tegra::Texture::ASTC { |
| 11 | 11 | ||
| 12 | std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height, | 12 | std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, |
| 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); | 13 | uint32_t depth, uint32_t block_width, uint32_t block_height); |
| 14 | 14 | ||
| 15 | } // namespace Tegra::Texture::ASTC | 15 | } // namespace Tegra::Texture::ASTC |
diff --git a/src/video_core/textures/convert.cpp b/src/video_core/textures/convert.cpp new file mode 100644 index 000000000..5e439f036 --- /dev/null +++ b/src/video_core/textures/convert.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstring> | ||
| 7 | #include <tuple> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/textures/astc.h" | ||
| 14 | #include "video_core/textures/convert.h" | ||
| 15 | |||
| 16 | namespace Tegra::Texture { | ||
| 17 | |||
| 18 | using VideoCore::Surface::PixelFormat; | ||
| 19 | |||
| 20 | template <bool reverse> | ||
| 21 | void SwapS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 22 | union S8Z24 { | ||
| 23 | BitField<0, 24, u32> z24; | ||
| 24 | BitField<24, 8, u32> s8; | ||
| 25 | }; | ||
| 26 | static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); | ||
| 27 | |||
| 28 | union Z24S8 { | ||
| 29 | BitField<0, 8, u32> s8; | ||
| 30 | BitField<8, 24, u32> z24; | ||
| 31 | }; | ||
| 32 | static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); | ||
| 33 | |||
| 34 | S8Z24 s8z24_pixel{}; | ||
| 35 | Z24S8 z24s8_pixel{}; | ||
| 36 | constexpr auto bpp{ | ||
| 37 | VideoCore::Surface::GetBytesPerPixel(VideoCore::Surface::PixelFormat::S8Z24)}; | ||
| 38 | for (std::size_t y = 0; y < height; ++y) { | ||
| 39 | for (std::size_t x = 0; x < width; ++x) { | ||
| 40 | const std::size_t offset{bpp * (y * width + x)}; | ||
| 41 | if constexpr (reverse) { | ||
| 42 | std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); | ||
| 43 | s8z24_pixel.s8.Assign(z24s8_pixel.s8); | ||
| 44 | s8z24_pixel.z24.Assign(z24s8_pixel.z24); | ||
| 45 | std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); | ||
| 46 | } else { | ||
| 47 | std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); | ||
| 48 | z24s8_pixel.s8.Assign(s8z24_pixel.s8); | ||
| 49 | z24s8_pixel.z24.Assign(s8z24_pixel.z24); | ||
| 50 | std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); | ||
| 51 | } | ||
| 52 | } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | static void ConvertS8Z24ToZ24S8(u8* data, u32 width, u32 height) { | ||
| 57 | SwapS8Z24ToZ24S8<false>(data, width, height); | ||
| 58 | } | ||
| 59 | |||
| 60 | static void ConvertZ24S8ToS8Z24(u8* data, u32 width, u32 height) { | ||
| 61 | SwapS8Z24ToZ24S8<true>(data, width, height); | ||
| 62 | } | ||
| 63 | |||
| 64 | void ConvertFromGuestToHost(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 65 | bool convert_astc, bool convert_s8z24) { | ||
| 66 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 67 | // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. | ||
| 68 | u32 block_width{}; | ||
| 69 | u32 block_height{}; | ||
| 70 | std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); | ||
| 71 | const std::vector<u8> rgba8_data = | ||
| 72 | Tegra::Texture::ASTC::Decompress(data, width, height, depth, block_width, block_height); | ||
| 73 | std::copy(rgba8_data.begin(), rgba8_data.end(), data); | ||
| 74 | |||
| 75 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | ||
| 76 | Tegra::Texture::ConvertS8Z24ToZ24S8(data, width, height); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | void ConvertFromHostToGuest(u8* data, PixelFormat pixel_format, u32 width, u32 height, u32 depth, | ||
| 81 | bool convert_astc, bool convert_s8z24) { | ||
| 82 | if (convert_astc && IsPixelFormatASTC(pixel_format)) { | ||
| 83 | LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", | ||
| 84 | static_cast<u32>(pixel_format)); | ||
| 85 | UNREACHABLE(); | ||
| 86 | |||
| 87 | } else if (convert_s8z24 && pixel_format == PixelFormat::S8Z24) { | ||
| 88 | Tegra::Texture::ConvertZ24S8ToS8Z24(data, width, height); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Tegra::Texture \ No newline at end of file | ||
diff --git a/src/video_core/textures/convert.h b/src/video_core/textures/convert.h new file mode 100644 index 000000000..07cd8b5da --- /dev/null +++ b/src/video_core/textures/convert.h | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "video_core/surface.h" | ||
| 9 | |||
| 10 | namespace Tegra::Texture { | ||
| 11 | |||
| 12 | void ConvertFromGuestToHost(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 13 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 14 | |||
| 15 | void ConvertFromHostToGuest(u8* data, VideoCore::Surface::PixelFormat pixel_format, u32 width, | ||
| 16 | u32 height, u32 depth, bool convert_astc, bool convert_s8z24); | ||
| 17 | |||
| 18 | } // namespace Tegra::Texture \ No newline at end of file | ||
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 5db75de22..cad7340f5 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp | |||
| @@ -103,8 +103,8 @@ void FastProcessBlock(u8* const swizzled_data, u8* const unswizzled_data, const | |||
| 103 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; | 103 | const u32 swizzle_offset{y_address + table[(xb / fast_swizzle_align) % 4]}; |
| 104 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; | 104 | const u32 out_x = xb * out_bytes_per_pixel / bytes_per_pixel; |
| 105 | const u32 pixel_index{out_x + pixel_base}; | 105 | const u32 pixel_index{out_x + pixel_base}; |
| 106 | data_ptrs[unswizzle] = swizzled_data + swizzle_offset; | 106 | data_ptrs[unswizzle ? 1 : 0] = swizzled_data + swizzle_offset; |
| 107 | data_ptrs[!unswizzle] = unswizzled_data + pixel_index; | 107 | data_ptrs[unswizzle ? 0 : 1] = unswizzled_data + pixel_index; |
| 108 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); | 108 | std::memcpy(data_ptrs[0], data_ptrs[1], fast_swizzle_align); |
| 109 | } | 109 | } |
| 110 | pixel_base += stride_x; | 110 | pixel_base += stride_x; |
| @@ -154,7 +154,7 @@ void SwizzledData(u8* const swizzled_data, u8* const unswizzled_data, const bool | |||
| 154 | for (u32 xb = 0; xb < blocks_on_x; xb++) { | 154 | for (u32 xb = 0; xb < blocks_on_x; xb++) { |
| 155 | const u32 x_start = xb * block_x_elements; | 155 | const u32 x_start = xb * block_x_elements; |
| 156 | const u32 x_end = std::min(width, x_start + block_x_elements); | 156 | const u32 x_end = std::min(width, x_start + block_x_elements); |
| 157 | if (fast) { | 157 | if constexpr (fast) { |
| 158 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, | 158 | FastProcessBlock(swizzled_data, unswizzled_data, unswizzle, x_start, y_start, |
| 159 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, | 159 | z_start, x_end, y_end, z_end, tile_offset, xy_block_size, |
| 160 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); | 160 | layer_z, stride_x, bytes_per_pixel, out_bytes_per_pixel); |
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 85b7e9f7b..65df86890 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h | |||
| @@ -16,16 +16,13 @@ inline std::size_t GetGOBSize() { | |||
| 16 | return 512; | 16 | return 512; |
| 17 | } | 17 | } |
| 18 | 18 | ||
| 19 | /** | 19 | /// Unswizzles a swizzled texture without changing its format. |
| 20 | * Unswizzles a swizzled texture without changing its format. | ||
| 21 | */ | ||
| 22 | void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, | 20 | void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, |
| 23 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 21 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 24 | u32 block_height = TICEntry::DefaultBlockHeight, | 22 | u32 block_height = TICEntry::DefaultBlockHeight, |
| 25 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); | 23 | u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); |
| 26 | /** | 24 | |
| 27 | * Unswizzles a swizzled texture without changing its format. | 25 | /// Unswizzles a swizzled texture without changing its format. |
| 28 | */ | ||
| 29 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, | 26 | std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, |
| 30 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 27 | u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 31 | u32 block_height = TICEntry::DefaultBlockHeight, | 28 | u32 block_height = TICEntry::DefaultBlockHeight, |
| @@ -37,15 +34,11 @@ void CopySwizzledData(u32 width, u32 height, u32 depth, u32 bytes_per_pixel, | |||
| 37 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, | 34 | u32 out_bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, |
| 38 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); | 35 | bool unswizzle, u32 block_height, u32 block_depth, u32 width_spacing); |
| 39 | 36 | ||
| 40 | /** | 37 | /// Decodes an unswizzled texture into a A8R8G8B8 texture. |
| 41 | * Decodes an unswizzled texture into a A8R8G8B8 texture. | ||
| 42 | */ | ||
| 43 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | 38 | std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, |
| 44 | u32 height); | 39 | u32 height); |
| 45 | 40 | ||
| 46 | /** | 41 | /// This function calculates the correct size of a texture depending if it's tiled or not. |
| 47 | * This function calculates the correct size of a texture depending if it's tiled or not. | ||
| 48 | */ | ||
| 49 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, | 42 | std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, |
| 50 | u32 block_height, u32 block_depth); | 43 | u32 block_height, u32 block_depth); |
| 51 | 44 | ||
| @@ -53,6 +46,7 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | |||
| 53 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, | 46 | void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, |
| 54 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 47 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |
| 55 | u32 block_height); | 48 | u32 block_height); |
| 49 | |||
| 56 | /// Copies a tiled subrectangle into a linear surface. | 50 | /// Copies a tiled subrectangle into a linear surface. |
| 57 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, | 51 | void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, |
| 58 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, | 52 | u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, |
diff --git a/src/yuzu/applets/web_browser.cpp b/src/yuzu/applets/web_browser.cpp index 6a9138d53..979b9ec14 100644 --- a/src/yuzu/applets/web_browser.cpp +++ b/src/yuzu/applets/web_browser.cpp | |||
| @@ -56,6 +56,8 @@ constexpr char NX_SHIM_INJECT_SCRIPT[] = R"( | |||
| 56 | window.nx.endApplet = function() { | 56 | window.nx.endApplet = function() { |
| 57 | applet_done = true; | 57 | applet_done = true; |
| 58 | }; | 58 | }; |
| 59 | |||
| 60 | window.onkeypress = function(e) { if (e.keyCode === 13) { applet_done = true; } }; | ||
| 59 | )"; | 61 | )"; |
| 60 | 62 | ||
| 61 | QString GetNXShimInjectionScript() { | 63 | QString GetNXShimInjectionScript() { |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 73b04b749..3b070bfbb 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -20,10 +20,7 @@ | |||
| 20 | EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} | 20 | EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} |
| 21 | 21 | ||
| 22 | void EmuThread::run() { | 22 | void EmuThread::run() { |
| 23 | if (!Settings::values.use_multi_core) { | 23 | render_window->MakeCurrent(); |
| 24 | // Single core mode must acquire OpenGL context for entire emulation session | ||
| 25 | render_window->MakeCurrent(); | ||
| 26 | } | ||
| 27 | 24 | ||
| 28 | MicroProfileOnThreadCreate("EmuThread"); | 25 | MicroProfileOnThreadCreate("EmuThread"); |
| 29 | 26 | ||
| @@ -38,6 +35,11 @@ void EmuThread::run() { | |||
| 38 | 35 | ||
| 39 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); | 36 | emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); |
| 40 | 37 | ||
| 38 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 39 | // Release OpenGL context for the GPU thread | ||
| 40 | render_window->DoneCurrent(); | ||
| 41 | } | ||
| 42 | |||
| 41 | // holds whether the cpu was running during the last iteration, | 43 | // holds whether the cpu was running during the last iteration, |
| 42 | // so that the DebugModeLeft signal can be emitted before the | 44 | // so that the DebugModeLeft signal can be emitted before the |
| 43 | // next execution step | 45 | // next execution step |
diff --git a/src/yuzu/compatdb.cpp b/src/yuzu/compatdb.cpp index c09a06520..c8b0a5ec0 100644 --- a/src/yuzu/compatdb.cpp +++ b/src/yuzu/compatdb.cpp | |||
| @@ -53,8 +53,8 @@ void CompatDB::Submit() { | |||
| 53 | case CompatDBPage::Final: | 53 | case CompatDBPage::Final: |
| 54 | back(); | 54 | back(); |
| 55 | LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); | 55 | LOG_DEBUG(Frontend, "Compatibility Rating: {}", compatibility->checkedId()); |
| 56 | Core::Telemetry().AddField(Telemetry::FieldType::UserFeedback, "Compatibility", | 56 | Core::System::GetInstance().TelemetrySession().AddField( |
| 57 | compatibility->checkedId()); | 57 | Telemetry::FieldType::UserFeedback, "Compatibility", compatibility->checkedId()); |
| 58 | 58 | ||
| 59 | button(NextButton)->setEnabled(false); | 59 | button(NextButton)->setEnabled(false); |
| 60 | button(NextButton)->setText(tr("Submitting")); | 60 | button(NextButton)->setText(tr("Submitting")); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9546dadf..74dc6bb28 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -374,6 +374,8 @@ void Config::ReadValues() { | |||
| 374 | qt_config->value("use_disk_shader_cache", false).toBool(); | 374 | qt_config->value("use_disk_shader_cache", false).toBool(); |
| 375 | Settings::values.use_accurate_gpu_emulation = | 375 | Settings::values.use_accurate_gpu_emulation = |
| 376 | qt_config->value("use_accurate_gpu_emulation", false).toBool(); | 376 | qt_config->value("use_accurate_gpu_emulation", false).toBool(); |
| 377 | Settings::values.use_asynchronous_gpu_emulation = | ||
| 378 | qt_config->value("use_asynchronous_gpu_emulation", false).toBool(); | ||
| 377 | 379 | ||
| 378 | Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); | 380 | Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); |
| 379 | Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); | 381 | Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); |
| @@ -633,6 +635,8 @@ void Config::SaveValues() { | |||
| 633 | qt_config->setValue("frame_limit", Settings::values.frame_limit); | 635 | qt_config->setValue("frame_limit", Settings::values.frame_limit); |
| 634 | qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); | 636 | qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); |
| 635 | qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); | 637 | qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); |
| 638 | qt_config->setValue("use_asynchronous_gpu_emulation", | ||
| 639 | Settings::values.use_asynchronous_gpu_emulation); | ||
| 636 | 640 | ||
| 637 | // Cast to double because Qt's written float values are not human-readable | 641 | // Cast to double because Qt's written float values are not human-readable |
| 638 | qt_config->setValue("bg_red", (double)Settings::values.bg_red); | 642 | qt_config->setValue("bg_red", (double)Settings::values.bg_red); |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 0f5dd534b..dd1d67488 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() { | |||
| 75 | ui->frame_limit->setValue(Settings::values.frame_limit); | 75 | ui->frame_limit->setValue(Settings::values.frame_limit); |
| 76 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); | 76 | ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); |
| 77 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | 77 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); |
| 78 | ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn()); | ||
| 79 | ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); | ||
| 78 | UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, | 80 | UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, |
| 79 | Settings::values.bg_blue)); | 81 | Settings::values.bg_blue)); |
| 80 | } | 82 | } |
| @@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() { | |||
| 86 | Settings::values.frame_limit = ui->frame_limit->value(); | 88 | Settings::values.frame_limit = ui->frame_limit->value(); |
| 87 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); | 89 | Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); |
| 88 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | 90 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); |
| 91 | Settings::values.use_asynchronous_gpu_emulation = | ||
| 92 | ui->use_asynchronous_gpu_emulation->isChecked(); | ||
| 89 | Settings::values.bg_red = static_cast<float>(bg_color.redF()); | 93 | Settings::values.bg_red = static_cast<float>(bg_color.redF()); |
| 90 | Settings::values.bg_green = static_cast<float>(bg_color.greenF()); | 94 | Settings::values.bg_green = static_cast<float>(bg_color.greenF()); |
| 91 | Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); | 95 | Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); |
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 824f5810a..c6767e0ca 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui | |||
| @@ -64,6 +64,13 @@ | |||
| 64 | </widget> | 64 | </widget> |
| 65 | </item> | 65 | </item> |
| 66 | <item> | 66 | <item> |
| 67 | <widget class="QCheckBox" name="use_asynchronous_gpu_emulation"> | ||
| 68 | <property name="text"> | ||
| 69 | <string>Use asynchronous GPU emulation</string> | ||
| 70 | </property> | ||
| 71 | </widget> | ||
| 72 | </item> | ||
| 73 | <item> | ||
| 67 | <layout class="QHBoxLayout" name="horizontalLayout"> | 74 | <layout class="QHBoxLayout" name="horizontalLayout"> |
| 68 | <item> | 75 | <item> |
| 69 | <widget class="QLabel" name="label"> | 76 | <widget class="QLabel" name="label"> |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 5ab7896d4..41ba3c4c6 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "applets/profile_select.h" | 11 | #include "applets/profile_select.h" |
| 12 | #include "applets/software_keyboard.h" | 12 | #include "applets/software_keyboard.h" |
| 13 | #include "applets/web_browser.h" | 13 | #include "applets/web_browser.h" |
| 14 | #include "configuration/configure_input.h" | ||
| 14 | #include "configuration/configure_per_general.h" | 15 | #include "configuration/configure_per_general.h" |
| 15 | #include "core/file_sys/vfs.h" | 16 | #include "core/file_sys/vfs.h" |
| 16 | #include "core/file_sys/vfs_real.h" | 17 | #include "core/file_sys/vfs_real.h" |
| @@ -339,6 +340,11 @@ void GMainWindow::WebBrowserOpenPage(std::string_view filename, std::string_view | |||
| 339 | .arg(QString::fromStdString(std::to_string(key_code)))); | 340 | .arg(QString::fromStdString(std::to_string(key_code)))); |
| 340 | }; | 341 | }; |
| 341 | 342 | ||
| 343 | QMessageBox::information( | ||
| 344 | this, tr("Exit"), | ||
| 345 | tr("To exit the web application, use the game provided controls to select exit, select the " | ||
| 346 | "'Exit Web Applet' option in the menu bar, or press the 'Enter' key.")); | ||
| 347 | |||
| 342 | bool running_exit_check = false; | 348 | bool running_exit_check = false; |
| 343 | while (!finished) { | 349 | while (!finished) { |
| 344 | QApplication::processEvents(); | 350 | QApplication::processEvents(); |
| @@ -522,6 +528,7 @@ void GMainWindow::InitializeHotkeys() { | |||
| 522 | Qt::ApplicationShortcut); | 528 | Qt::ApplicationShortcut); |
| 523 | hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", | 529 | hotkey_registry.RegisterHotkey("Main Window", "Capture Screenshot", |
| 524 | QKeySequence(QKeySequence::Print)); | 530 | QKeySequence(QKeySequence::Print)); |
| 531 | hotkey_registry.RegisterHotkey("Main Window", "Change Docked Mode", QKeySequence(Qt::Key_F10)); | ||
| 525 | 532 | ||
| 526 | hotkey_registry.LoadHotkeys(); | 533 | hotkey_registry.LoadHotkeys(); |
| 527 | 534 | ||
| @@ -591,6 +598,12 @@ void GMainWindow::InitializeHotkeys() { | |||
| 591 | OnCaptureScreenshot(); | 598 | OnCaptureScreenshot(); |
| 592 | } | 599 | } |
| 593 | }); | 600 | }); |
| 601 | connect(hotkey_registry.GetHotkey("Main Window", "Change Docked Mode", this), | ||
| 602 | &QShortcut::activated, this, [&] { | ||
| 603 | Settings::values.use_docked_mode = !Settings::values.use_docked_mode; | ||
| 604 | OnDockedModeChanged(!Settings::values.use_docked_mode, | ||
| 605 | Settings::values.use_docked_mode); | ||
| 606 | }); | ||
| 594 | } | 607 | } |
| 595 | 608 | ||
| 596 | void GMainWindow::SetDefaultUIGeometry() { | 609 | void GMainWindow::SetDefaultUIGeometry() { |
| @@ -849,7 +862,7 @@ bool GMainWindow::LoadROM(const QString& filename) { | |||
| 849 | } | 862 | } |
| 850 | game_path = filename; | 863 | game_path = filename; |
| 851 | 864 | ||
| 852 | Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); | 865 | system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "Qt"); |
| 853 | return true; | 866 | return true; |
| 854 | } | 867 | } |
| 855 | 868 | ||
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index ff05b3179..32e78049c 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -346,7 +346,7 @@ void Config::ReadValues() { | |||
| 346 | 346 | ||
| 347 | // Renderer | 347 | // Renderer |
| 348 | Settings::values.resolution_factor = | 348 | Settings::values.resolution_factor = |
| 349 | (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0); | 349 | static_cast<float>(sdl2_config->GetReal("Renderer", "resolution_factor", 1.0)); |
| 350 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); | 350 | Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); |
| 351 | Settings::values.frame_limit = | 351 | Settings::values.frame_limit = |
| 352 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); | 352 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); |
| @@ -354,17 +354,20 @@ void Config::ReadValues() { | |||
| 354 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | 354 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); |
| 355 | Settings::values.use_accurate_gpu_emulation = | 355 | Settings::values.use_accurate_gpu_emulation = |
| 356 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | 356 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); |
| 357 | Settings::values.use_asynchronous_gpu_emulation = | ||
| 358 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | ||
| 357 | 359 | ||
| 358 | Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); | 360 | Settings::values.bg_red = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_red", 0.0)); |
| 359 | Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); | 361 | Settings::values.bg_green = |
| 360 | Settings::values.bg_blue = (float)sdl2_config->GetReal("Renderer", "bg_blue", 0.0); | 362 | static_cast<float>(sdl2_config->GetReal("Renderer", "bg_green", 0.0)); |
| 363 | Settings::values.bg_blue = static_cast<float>(sdl2_config->GetReal("Renderer", "bg_blue", 0.0)); | ||
| 361 | 364 | ||
| 362 | // Audio | 365 | // Audio |
| 363 | Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); | 366 | Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto"); |
| 364 | Settings::values.enable_audio_stretching = | 367 | Settings::values.enable_audio_stretching = |
| 365 | sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); | 368 | sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true); |
| 366 | Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); | 369 | Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto"); |
| 367 | Settings::values.volume = sdl2_config->GetReal("Audio", "volume", 1); | 370 | Settings::values.volume = static_cast<float>(sdl2_config->GetReal("Audio", "volume", 1)); |
| 368 | 371 | ||
| 369 | Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); | 372 | Settings::values.language_index = sdl2_config->GetInteger("System", "language_index", 1); |
| 370 | 373 | ||
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a81986f8e..6538af098 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -118,6 +118,10 @@ use_disk_shader_cache = | |||
| 118 | # 0 (default): Off (fast), 1 : On (slow) | 118 | # 0 (default): Off (fast), 1 : On (slow) |
| 119 | use_accurate_gpu_emulation = | 119 | use_accurate_gpu_emulation = |
| 120 | 120 | ||
| 121 | # Whether to use asynchronous GPU emulation | ||
| 122 | # 0 : Off (slow), 1 (default): On (fast) | ||
| 123 | use_asynchronous_gpu_emulation = | ||
| 124 | |||
| 121 | # The clear color for the renderer. What shows up on the sides of the bottom screen. | 125 | # The clear color for the renderer. What shows up on the sides of the bottom screen. |
| 122 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. | 126 | # Must be in range of 0.0-1.0. Defaults to 1.0 for all. |
| 123 | bg_red = | 127 | bg_red = |
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index c34b5467f..c6c66a787 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp | |||
| @@ -216,7 +216,7 @@ int main(int argc, char** argv) { | |||
| 216 | } | 216 | } |
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | Core::Telemetry().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); | 219 | system.TelemetrySession().AddField(Telemetry::FieldType::App, "Frontend", "SDL"); |
| 220 | 220 | ||
| 221 | system.Renderer().Rasterizer().LoadDiskResources(); | 221 | system.Renderer().Rasterizer().LoadDiskResources(); |
| 222 | 222 | ||